{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 14082, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010651896037494673, "grad_norm": 127.65389567139255, "learning_rate": 4.2553191489361695e-09, "loss": 0.5528, "step": 5, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 1.0, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env_macro_mean": 0.9, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982850609756098, "tokens_p.mean_in_band": 0.67109375, "tokens_rate.above_band": 0.9949443882709808, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005055611729019211 }, { "epoch": 0.0021303792074989347, "grad_norm": 182.45480532526113, "learning_rate": 9.574468085106382e-09, "loss": 0.839, "step": 10, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.75, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.science": 0.3333333333333333, "success_rate.epoch.env_macro_mean": 0.8194444444444443, "success_rate.epoch.global": 0.8, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984365348980853, "tokens_p.mean_in_band": 0.5885416666666666, "tokens_rate.above_band": 0.9729567307692307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027043269230769232 }, { "epoch": 0.0031955688112484024, "grad_norm": 143.2525098677747, "learning_rate": 1.4893617021276594e-08, "loss": 1.1298, "step": 15, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.75, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.science": 0.25, "success_rate.epoch.env_macro_mean": 0.8428571428571429, "success_rate.epoch.global": 0.7333333333333333, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.7333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9891447368421052, "tokens_p.mean_below_band": 3.841705620288849e-09, "tokens_p.mean_in_band": 0.3806046195652174, "tokens_rate.above_band": 0.7983193277310925, "tokens_rate.below_band": 0.008403361344537815, "tokens_rate.in_band": 0.19327731092436976 }, { "epoch": 0.004260758414997869, "grad_norm": 102.603002394448, "learning_rate": 2.0212765957446807e-08, "loss": 0.867, "step": 20, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.8461538461538461, "success_rate.epoch.env.science": 0.36363636363636365, "success_rate.epoch.env_macro_mean": 0.82997002997003, "success_rate.epoch.global": 0.725, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9979813664596273, "tokens_p.mean_in_band": 0.5777698863636364, "tokens_rate.above_band": 0.973397823458283, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026602176541717048 }, { "epoch": 0.005325948018747337, "grad_norm": 134.87519657802602, "learning_rate": 2.553191489361702e-08, "loss": 0.8741, "step": 25, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env_macro_mean": 0.8285714285714285, "success_rate.epoch.global": 0.7, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9990748355263158, "tokens_p.mean_in_band": 0.5151827830188679, "tokens_rate.above_band": 0.9198184568835098, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08018154311649017 }, { "epoch": 0.006391137622496805, "grad_norm": 146.47774240723183, "learning_rate": 3.085106382978723e-08, "loss": 0.7981, "step": 30, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8125, "success_rate.epoch.env.science": 0.48, "success_rate.epoch.env_macro_mean": 0.8490625, "success_rate.epoch.global": 0.6833333333333333, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9987949871465296, "tokens_p.mean_in_band": 0.38421875, "tokens_rate.above_band": 0.9396135265700483, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06038647342995169 }, { "epoch": 0.007456327226246272, "grad_norm": 164.69253341969946, "learning_rate": 3.617021276595745e-08, "loss": 0.7385, "step": 35, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8421052631578947, "success_rate.epoch.env.science": 0.5172413793103449, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8732607380520265, "success_rate.epoch.global": 0.7142857142857143, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988586956521739, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.979557069846678, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020442930153321975 }, { "epoch": 0.008521516829995739, "grad_norm": 123.14828386632749, "learning_rate": 4.1489361702127654e-08, "loss": 0.7603, "step": 40, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.42857142857142855, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.science": 0.45454545454545453, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8573833573833574, "success_rate.epoch.global": 0.675, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.26666666666666666, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.993560606060606, "tokens_p.mean_in_band": 0.516953125, "tokens_rate.above_band": 0.7674418604651163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.23255813953488372 }, { "epoch": 0.009586706433745207, "grad_norm": 101.25030838586852, "learning_rate": 4.680851063829787e-08, "loss": 0.7417, "step": 45, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.42857142857142855, "success_rate.epoch.env.math": 0.8275862068965517, "success_rate.epoch.env.science": 0.4864864864864865, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860293791328274, "success_rate.epoch.global": 0.6888888888888889, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995207055214724, "tokens_p.mean_in_band": 0.41552734375, "tokens_rate.above_band": 0.9760479041916168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023952095808383235 }, { "epoch": 0.010651896037494673, "grad_norm": 185.78207769109625, "learning_rate": 5.2127659574468084e-08, "loss": 0.7104, "step": 50, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.42857142857142855, "success_rate.epoch.env.math": 0.84375, "success_rate.epoch.env.science": 0.46153846153846156, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8733859890109891, "success_rate.epoch.global": 0.7, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963815789473685, "tokens_p.mean_in_band": 0.4185267857142857, "tokens_rate.above_band": 0.9895833333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010416666666666666 }, { "epoch": 0.011717085641244141, "grad_norm": 220.60635659314818, "learning_rate": 5.74468085106383e-08, "loss": 0.813, "step": 55, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.8378378378378378, "success_rate.epoch.env.science": 0.43902439024390244, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8832417783637296, "success_rate.epoch.global": 0.7, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9987009237875288, "tokens_p.mean_in_band": 0.5329241071428571, "tokens_rate.above_band": 0.9686800894854586, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03131991051454139 }, { "epoch": 0.01278227524499361, "grad_norm": 280.9108909418842, "learning_rate": 6.27659574468085e-08, "loss": 0.8872, "step": 60, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5454545454545454, "success_rate.epoch.env.math": 0.8461538461538461, "success_rate.epoch.env.science": 0.45652173913043476, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8848130130738827, "success_rate.epoch.global": 0.7, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.775, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949263649425287, "tokens_p.mean_in_band": 0.4735243055555556, "tokens_rate.above_band": 0.9508196721311475, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04918032786885246 }, { "epoch": 0.013847464848743076, "grad_norm": 122.85610672798597, "learning_rate": 6.808510638297871e-08, "loss": 0.7545, "step": 65, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9166666666666666, "success_rate.epoch.env.logic": 0.5454545454545454, "success_rate.epoch.env.math": 0.85, "success_rate.epoch.env.science": 0.47058823529411764, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.878270944741533, "success_rate.epoch.global": 0.7, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.775, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0001908396946564, "tokens_p.mean_in_band": 0.494140625, "tokens_rate.above_band": 0.9899244332493703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010075566750629723 }, { "epoch": 0.014912654452492544, "grad_norm": 81.61402105739417, "learning_rate": 7.340425531914894e-08, "loss": 0.7344, "step": 70, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9230769230769231, "success_rate.epoch.env.logic": 0.46153846153846156, "success_rate.epoch.env.math": 0.8666666666666667, "success_rate.epoch.env.science": 0.46153846153846156, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712820512820514, "success_rate.epoch.global": 0.7, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975978490832158, "tokens_p.mean_in_band": 0.47662259615384617, "tokens_rate.above_band": 0.9561699258260283, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04383007417397168 }, { "epoch": 0.015977844056242013, "grad_norm": 153.62559665029062, "learning_rate": 7.872340425531915e-08, "loss": 0.5339, "step": 75, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5333333333333333, "success_rate.epoch.env.math": 0.8723404255319149, "success_rate.epoch.env.science": 0.49122807017543857, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8825473257612115, "success_rate.epoch.global": 0.7133333333333334, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967935667752443, "tokens_p.mean_in_band": 0.4296875, "tokens_rate.above_band": 0.9699842022116903, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030015797788309637 }, { "epoch": 0.017043033659991477, "grad_norm": 171.79187123368592, "learning_rate": 8.404255319148936e-08, "loss": 0.6487, "step": 80, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5625, "success_rate.epoch.env.math": 0.8846153846153846, "success_rate.epoch.env.science": 0.4915254237288136, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8867212236915627, "success_rate.epoch.global": 0.725, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998422712933754, "tokens_p.mean_in_band": 0.701904296875, "tokens_rate.above_band": 0.9814241486068112, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018575851393188854 }, { "epoch": 0.018108223263740945, "grad_norm": 78.69450458066295, "learning_rate": 8.936170212765957e-08, "loss": 0.382, "step": 85, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.8928571428571429, "success_rate.epoch.env.science": 0.5079365079365079, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8884920634920637, "success_rate.epoch.global": 0.7294117647058823, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967077759197325, "tokens_p.mean_in_band": 0.49402573529411764, "tokens_rate.above_band": 0.9462025316455697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05379746835443038 }, { "epoch": 0.019173412867490414, "grad_norm": 117.32732108960155, "learning_rate": 9.468085106382978e-08, "loss": 0.5551, "step": 90, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9016393442622951, "success_rate.epoch.env.science": 0.5076923076923077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8937903080526031, "success_rate.epoch.global": 0.7388888888888889, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963165399239544, "tokens_p.mean_in_band": 0.63046875, "tokens_rate.above_band": 0.9813432835820896, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018656716417910446 }, { "epoch": 0.020238602471239882, "grad_norm": 125.13625422096041, "learning_rate": 1e-07, "loss": 0.4718, "step": 95, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6363636363636364, "success_rate.epoch.env.math": 0.9047619047619048, "success_rate.epoch.env.science": 0.5223880597014925, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8992085029398462, "success_rate.epoch.global": 0.7526315789473684, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9998063816604709, "tokens_p.mean_in_band": 0.639453125, "tokens_rate.above_band": 0.9938423645320197, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006157635467980296 }, { "epoch": 0.021303792074989347, "grad_norm": 191.69681285035233, "learning_rate": 1.053191489361702e-07, "loss": 0.5207, "step": 100, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6666666666666666, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.science": 0.5285714285714286, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9032900432900434, "success_rate.epoch.global": 0.76, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964622641509434, "tokens_p.mean_in_band": 0.5245535714285714, "tokens_rate.above_band": 0.9814814814814815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018518518518518517 }, { "epoch": 0.022368981678738815, "grad_norm": 334.618707129673, "learning_rate": 1.1063829787234042e-07, "loss": 0.6853, "step": 105, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.68, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.science": 0.5333333333333333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9041904761904762, "success_rate.epoch.global": 0.7571428571428571, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9907094594594594, "tokens_p.mean_in_band": 0.3910590277777778, "tokens_rate.above_band": 0.8043478260869565, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1956521739130435 }, { "epoch": 0.023434171282488283, "grad_norm": 223.77307793873968, "learning_rate": 1.1595744680851063e-07, "loss": 0.6475, "step": 110, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.7037037037037037, "success_rate.epoch.env.math": 0.9066666666666666, "success_rate.epoch.env.science": 0.5256410256410257, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9064582824582825, "success_rate.epoch.global": 0.759090909090909, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939903846153846, "tokens_p.mean_in_band": 0.5901988636363636, "tokens_rate.above_band": 0.8764044943820225, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12359550561797752 }, { "epoch": 0.02449936088623775, "grad_norm": 99.52150128216505, "learning_rate": 1.2127659574468084e-07, "loss": 0.5094, "step": 115, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.7142857142857143, "success_rate.epoch.env.math": 0.9113924050632911, "success_rate.epoch.env.science": 0.525, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9079249547920434, "success_rate.epoch.global": 0.7641921397379913, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9977941176470588, "tokens_p.mean_in_band": 0.373046875, "tokens_rate.above_band": 0.9550561797752809, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0449438202247191 }, { "epoch": 0.02556455048998722, "grad_norm": 121.46142275404097, "learning_rate": 1.2659574468085107e-07, "loss": 0.759, "step": 120, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.7142857142857143, "success_rate.epoch.env.math": 0.9024390243902439, "success_rate.epoch.env.science": 0.5301204819277109, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9080178553937003, "success_rate.epoch.global": 0.7656903765690377, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973714953271028, "tokens_p.mean_in_band": 0.6136067708333334, "tokens_rate.above_band": 0.963963963963964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036036036036036036 }, { "epoch": 0.026629740093736684, "grad_norm": 38.88557519636338, "learning_rate": 1.3191489361702127e-07, "loss": 0.3261, "step": 125, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.6896551724137931, "success_rate.epoch.env.math": 0.9058823529411765, "success_rate.epoch.env.science": 0.5232558139534884, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9056293339308457, "success_rate.epoch.global": 0.7630522088353414, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976340694006309, "tokens_p.mean_in_band": 0.6704963235294118, "tokens_rate.above_band": 0.9738863287250384, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026113671274961597 }, { "epoch": 0.027694929697486152, "grad_norm": 660.6599264924243, "learning_rate": 1.372340425531915e-07, "loss": 0.6337, "step": 130, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.7096774193548387, "success_rate.epoch.env.math": 0.8953488372093024, "success_rate.epoch.env.science": 0.5393258426966292, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9088796543705214, "success_rate.epoch.global": 0.7683397683397684, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999388966480447, "tokens_p.mean_in_band": 0.637451171875, "tokens_rate.above_band": 0.9944444444444445, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005555555555555556 }, { "epoch": 0.02876011930123562, "grad_norm": 200.43701752538294, "learning_rate": 1.425531914893617e-07, "loss": 0.6314, "step": 135, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.71875, "success_rate.epoch.env.math": 0.896551724137931, "success_rate.epoch.env.science": 0.5416666666666666, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9101412835249043, "success_rate.epoch.global": 0.7657992565055762, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.8928571428571428, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9987012987012988, "tokens_p.mean_in_band": 0.4561244419642857, "tokens_rate.above_band": 0.9649122807017544, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03508771929824561 }, { "epoch": 0.02982530890498509, "grad_norm": 79.87488421431745, "learning_rate": 1.4787234042553191e-07, "loss": 0.4928, "step": 140, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7352941176470589, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.science": 0.54, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.912266253869969, "success_rate.epoch.global": 0.7670250896057348, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975, "tokens_p.mean_in_band": 0.5002297794117647, "tokens_rate.above_band": 0.989321608040201, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010678391959798994 }, { "epoch": 0.030890498508734553, "grad_norm": 141.13739354995624, "learning_rate": 1.531914893617021e-07, "loss": 0.5645, "step": 145, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7352941176470589, "success_rate.epoch.env.math": 0.9032258064516129, "success_rate.epoch.env.science": 0.5514018691588785, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9137290214310182, "success_rate.epoch.global": 0.7681660899653979, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9900265957446809, "tokens_p.mean_in_band": 0.49947916666666664, "tokens_rate.above_band": 0.8623853211009175, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13761467889908258 }, { "epoch": 0.031955688112484025, "grad_norm": 168.0282207866582, "learning_rate": 1.585106382978723e-07, "loss": 0.3307, "step": 150, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7567567567567568, "success_rate.epoch.env.math": 0.9072164948453608, "success_rate.epoch.env.science": 0.5412844036697247, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9152626076324474, "success_rate.epoch.global": 0.7692307692307693, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968354430379747, "tokens_p.mean_in_band": 0.3268229166666667, "tokens_rate.above_band": 0.8977272727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10227272727272728 }, { "epoch": 0.033020877716233486, "grad_norm": 242.28975794793152, "learning_rate": 1.6382978723404256e-07, "loss": 0.5423, "step": 155, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7631578947368421, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.science": 0.5486725663716814, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9168289791252064, "success_rate.epoch.global": 0.7734627831715211, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9932369402985075, "tokens_p.mean_in_band": 0.4778645833333333, "tokens_rate.above_band": 0.9370629370629371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06293706293706294 }, { "epoch": 0.034086067319982954, "grad_norm": 98.24957404326659, "learning_rate": 1.6914893617021276e-07, "loss": 0.5934, "step": 160, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7692307692307693, "success_rate.epoch.env.math": 0.9117647058823529, "success_rate.epoch.env.science": 0.559322033898305, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.918768593006406, "success_rate.epoch.global": 0.7774294670846394, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982404279279279, "tokens_p.mean_in_band": 0.4409722222222222, "tokens_rate.above_band": 0.9801324503311258, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019867549668874173 }, { "epoch": 0.03515125692373242, "grad_norm": 207.29874757978172, "learning_rate": 1.7446808510638299e-07, "loss": 0.5853, "step": 165, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.7804878048780488, "success_rate.epoch.env.math": 0.9150943396226415, "success_rate.epoch.env.science": 0.5702479338842975, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9215830078384988, "success_rate.epoch.global": 0.78419452887538, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984124331550802, "tokens_p.mean_in_band": 0.806640625, "tokens_rate.above_band": 0.9946808510638298, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005319148936170213 }, { "epoch": 0.03621644652748189, "grad_norm": 215.72872373681392, "learning_rate": 1.7978723404255318e-07, "loss": 0.5016, "step": 170, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.7954545454545454, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.science": 0.5772357723577236, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9222690317812269, "success_rate.epoch.global": 0.7846607669616519, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9991619809688581, "tokens_p.mean_in_band": 0.5600961538461539, "tokens_rate.above_band": 0.9780033840947546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021996615905245348 }, { "epoch": 0.03728163613123136, "grad_norm": 88.1798383756124, "learning_rate": 1.8510638297872338e-07, "loss": 0.4687, "step": 175, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.7954545454545454, "success_rate.epoch.env.math": 0.9017857142857143, "success_rate.epoch.env.science": 0.578125, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9225365259740259, "success_rate.epoch.global": 0.7851002865329513, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972988077496274, "tokens_p.mean_in_band": 0.6089409722222222, "tokens_rate.above_band": 0.986764705882353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013235294117647059 }, { "epoch": 0.03834682573498083, "grad_norm": 117.02852695568869, "learning_rate": 1.904255319148936e-07, "loss": 0.3779, "step": 180, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.7777777777777778, "success_rate.epoch.env.math": 0.9051724137931034, "success_rate.epoch.env.science": 0.5725190839694656, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9207850227921298, "success_rate.epoch.global": 0.7827298050139275, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985414866032843, "tokens_p.mean_in_band": 0.544016768292683, "tokens_rate.above_band": 0.9657762938230384, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034223706176961605 }, { "epoch": 0.039412015338730295, "grad_norm": 362.9432453883575, "learning_rate": 1.957446808510638e-07, "loss": 0.5635, "step": 185, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.7872340425531915, "success_rate.epoch.env.math": 0.9067796610169492, "success_rate.epoch.env.science": 0.5808823529411765, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9229441511056772, "success_rate.epoch.global": 0.7859078590785907, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973169191919192, "tokens_p.mean_in_band": 0.7493489583333334, "tokens_rate.above_band": 0.9565217391304348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043478260869565216 }, { "epoch": 0.040477204942479764, "grad_norm": 263.98176650817317, "learning_rate": 2.0106382978723403e-07, "loss": 0.5884, "step": 190, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8, "success_rate.epoch.env.math": 0.907563025210084, "success_rate.epoch.env.science": 0.574468085106383, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9236576564861922, "success_rate.epoch.global": 0.783641160949868, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983277591973244, "tokens_p.mean_in_band": 0.4421875, "tokens_rate.above_band": 0.9522292993630573, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04777070063694268 }, { "epoch": 0.04154239454622923, "grad_norm": 99.82716689138132, "learning_rate": 2.0638297872340426e-07, "loss": 0.5266, "step": 195, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.803921568627451, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.science": 0.5684931506849316, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9236051082948746, "success_rate.epoch.global": 0.781491002570694, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.8800000000000001, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971910112359551, "tokens_p.mean_in_band": 0.5352076480263158, "tokens_rate.above_band": 0.9035532994923858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09644670050761421 }, { "epoch": 0.04260758414997869, "grad_norm": 477.27558166201226, "learning_rate": 2.1170212765957448e-07, "loss": 0.4063, "step": 200, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8076923076923077, "success_rate.epoch.env.math": 0.912, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.5666666666666667, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9309913117185844, "success_rate.epoch.global": 0.7819548872180451, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9920910493827161, "tokens_p.mean_below_band": 8.585629984736443e-10, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9050279329608939, "tokens_rate.below_band": 0.00558659217877095, "tokens_rate.in_band": 0.0893854748603352 }, { "epoch": 0.04367277375372816, "grad_norm": 143.40155203496522, "learning_rate": 2.1702127659574465e-07, "loss": 0.471, "step": 205, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8113207547169812, "success_rate.epoch.env.math": 0.9069767441860465, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.577922077922078, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9320673923595945, "success_rate.epoch.global": 0.784841075794621, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952986725663717, "tokens_p.mean_in_band": 0.6884765625, "tokens_rate.above_band": 0.904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.096 }, { "epoch": 0.04473796335747763, "grad_norm": 616.1292866345375, "learning_rate": 2.2234042553191488e-07, "loss": 0.7285, "step": 210, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8, "success_rate.epoch.env.math": 0.9083969465648855, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.5786163522012578, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.931230457990598, "success_rate.epoch.global": 0.7828162291169452, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.775, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.999496644295302, "tokens_p.mean_in_band": 0.5057444852941176, "tokens_rate.above_band": 0.9563543003851092, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043645699614890884 }, { "epoch": 0.0458031529612271, "grad_norm": 386.42059521215043, "learning_rate": 2.276595744680851e-07, "loss": 0.5514, "step": 215, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.96, "success_rate.epoch.env.logic": 0.7894736842105263, "success_rate.epoch.env.math": 0.9097744360902256, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.5766871165644172, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9305395669877427, "success_rate.epoch.global": 0.7808857808857809, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971532091097308, "tokens_p.mean_in_band": 0.634375, "tokens_rate.above_band": 0.9698795180722891, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030120481927710843 }, { "epoch": 0.046868342564976566, "grad_norm": 145.68890202002504, "learning_rate": 2.3297872340425533e-07, "loss": 0.4387, "step": 220, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.96, "success_rate.epoch.env.logic": 0.7903225806451613, "success_rate.epoch.env.math": 0.9104477611940298, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.5843373493975904, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9313734264760711, "success_rate.epoch.global": 0.7835990888382688, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.8, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998688352570829, "tokens_p.mean_in_band": 0.49254261363636365, "tokens_rate.above_band": 0.9665314401622718, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033468559837728194 }, { "epoch": 0.047933532168726034, "grad_norm": 71.69789068314002, "learning_rate": 2.382978723404255e-07, "loss": 0.4261, "step": 225, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.796875, "success_rate.epoch.env.math": 0.9117647058823529, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.5808383233532934, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9320400901998735, "success_rate.epoch.global": 0.7861915367483296, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977207293666027, "tokens_p.mean_in_band": 0.6015625, "tokens_rate.above_band": 0.9961759082217974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0038240917782026767 }, { "epoch": 0.0489987217724755, "grad_norm": 52.732008175907225, "learning_rate": 2.4361702127659575e-07, "loss": 0.5333, "step": 230, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8, "success_rate.epoch.env.math": 0.9136690647482014, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.5798816568047337, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8869557895014453, "success_rate.epoch.global": 0.7864923747276689, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9962565104166666, "tokens_p.mean_in_band": 0.501171875, "tokens_rate.above_band": 0.9056603773584906, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09433962264150944 }, { "epoch": 0.05006391137622497, "grad_norm": 173.93139277894971, "learning_rate": 2.48936170212766e-07, "loss": 0.5164, "step": 235, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.803030303030303, "success_rate.epoch.env.math": 0.916083916083916, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.5862068965517241, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8880258253299005, "success_rate.epoch.global": 0.7889125799573561, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901889534883721, "tokens_p.mean_in_band": 0.5385044642857143, "tokens_rate.above_band": 0.9247311827956989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07526881720430108 }, { "epoch": 0.05112910097997444, "grad_norm": 101.93574906798119, "learning_rate": 2.5425531914893615e-07, "loss": 0.421, "step": 240, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8059701492537313, "success_rate.epoch.env.math": 0.9183673469387755, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.5977653631284916, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.889551438389451, "success_rate.epoch.global": 0.7933194154488518, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9941620879120879, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.0521942905837239, "grad_norm": 73.72945167563957, "learning_rate": 2.5957446808510637e-07, "loss": 0.431, "step": 245, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8088235294117647, "success_rate.epoch.env.math": 0.9194630872483222, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6010928961748634, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8902129523452648, "success_rate.epoch.global": 0.7955010224948875, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974038461538461, "tokens_p.mean_in_band": 0.6399739583333334, "tokens_rate.above_band": 0.9643916913946587, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03560830860534125 }, { "epoch": 0.05325948018747337, "grad_norm": 85.91349736103393, "learning_rate": 2.648936170212766e-07, "loss": 0.4619, "step": 250, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8115942028985508, "success_rate.epoch.env.math": 0.9144736842105263, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6063829787234043, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8904921662541313, "success_rate.epoch.global": 0.7955911823647295, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9919084821428571, "tokens_p.mean_in_band": 0.5946514423076923, "tokens_rate.above_band": 0.896, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.104 }, { "epoch": 0.054324669791222836, "grad_norm": 286.98707907761883, "learning_rate": 2.7021276595744677e-07, "loss": 0.2553, "step": 255, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8169014084507042, "success_rate.epoch.env.math": 0.9161290322580645, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6020942408376964, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.890735240409948, "success_rate.epoch.global": 0.7956777996070727, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969723183391004, "tokens_p.mean_in_band": 0.5823863636363636, "tokens_rate.above_band": 0.9633333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03666666666666667 }, { "epoch": 0.055389859394972304, "grad_norm": 86.96736797423058, "learning_rate": 2.75531914893617e-07, "loss": 0.3657, "step": 260, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8194444444444444, "success_rate.epoch.env.math": 0.9171974522292994, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6071428571428571, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8763710045557179, "success_rate.epoch.global": 0.7957610789980732, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967447916666666, "tokens_p.mean_in_band": 0.6536959134615384, "tokens_rate.above_band": 0.8470588235294118, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15294117647058825 }, { "epoch": 0.05645504899872177, "grad_norm": 137.87840566102082, "learning_rate": 2.808510638297872e-07, "loss": 0.5173, "step": 265, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9642857142857143, "success_rate.epoch.env.logic": 0.821917808219178, "success_rate.epoch.env.math": 0.9182389937106918, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6119402985074627, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8772469225505801, "success_rate.epoch.global": 0.7977315689981096, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943364395886889, "tokens_p.mean_in_band": 0.7736280487804879, "tokens_rate.above_band": 0.9499389499389499, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050061050061050064 }, { "epoch": 0.05752023860247124, "grad_norm": 283.6396316509124, "learning_rate": 2.8617021276595744e-07, "loss": 0.4702, "step": 270, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9655172413793104, "success_rate.epoch.env.logic": 0.8243243243243243, "success_rate.epoch.env.math": 0.9125, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6201923076923077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8778061097026615, "success_rate.epoch.global": 0.7977736549165121, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969907407407408, "tokens_p.mean_in_band": 0.58447265625, "tokens_rate.above_band": 0.9507042253521126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04929577464788732 }, { "epoch": 0.05858542820622071, "grad_norm": 225.6895500803243, "learning_rate": 2.914893617021276e-07, "loss": 0.3096, "step": 275, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9655172413793104, "success_rate.epoch.env.logic": 0.8243243243243243, "success_rate.epoch.env.math": 0.9146341463414634, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6255924170616114, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8784910420400039, "success_rate.epoch.global": 0.8014571948998178, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970737632508834, "tokens_p.mean_in_band": 0.8645833333333334, "tokens_rate.above_band": 0.9947275922671354, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005272407732864675 }, { "epoch": 0.05965061780997018, "grad_norm": 107.61547173808319, "learning_rate": 2.9680851063829784e-07, "loss": 0.5635, "step": 280, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9655172413793104, "success_rate.epoch.env.logic": 0.8243243243243243, "success_rate.epoch.env.math": 0.9156626506024096, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6267281105990783, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8786877872944051, "success_rate.epoch.global": 0.8010752688172043, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9954252577319588, "tokens_p.mean_in_band": 0.5911458333333334, "tokens_rate.above_band": 0.9642147117296223, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03578528827037773 }, { "epoch": 0.060715807413719645, "grad_norm": 79.62456069953544, "learning_rate": 3.0212765957446807e-07, "loss": 0.3437, "step": 285, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9666666666666667, "success_rate.epoch.env.logic": 0.8289473684210527, "success_rate.epoch.env.math": 0.9161676646706587, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6306306306306306, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8796132421565767, "success_rate.epoch.global": 0.8028169014084507, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980742296918768, "tokens_p.mean_in_band": 0.6998697916666666, "tokens_rate.above_band": 0.967479674796748, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032520325203252036 }, { "epoch": 0.06178099701746911, "grad_norm": 184.29277848775067, "learning_rate": 3.074468085106383e-07, "loss": 0.4092, "step": 290, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.967741935483871, "success_rate.epoch.env.logic": 0.8289473684210527, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6327433628318584, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8799484242487984, "success_rate.epoch.global": 0.8044982698961938, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945987654320988, "tokens_p.mean_in_band": 0.490234375, "tokens_rate.above_band": 0.9878048780487805, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012195121951219513 }, { "epoch": 0.06284618662121857, "grad_norm": 192.1485208245573, "learning_rate": 3.127659574468085e-07, "loss": 0.5063, "step": 295, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.967741935483871, "success_rate.epoch.env.logic": 0.8289473684210527, "success_rate.epoch.env.math": 0.9181286549707602, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6320346320346321, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8800169022039682, "success_rate.epoch.global": 0.8044217687074829, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971289752650176, "tokens_p.mean_in_band": 0.51220703125, "tokens_rate.above_band": 0.9725085910652921, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027491408934707903 }, { "epoch": 0.06391137622496805, "grad_norm": 53.717681249975506, "learning_rate": 3.1808510638297874e-07, "loss": 0.3581, "step": 300, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8311688311688312, "success_rate.epoch.env.math": 0.9186046511627907, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6329113924050633, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8775925643700018, "success_rate.epoch.global": 0.802675585284281, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9890836408364083, "tokens_p.mean_below_band": 7.338821887969971e-07, "tokens_p.mean_in_band": 0.4773232677902622, "tokens_rate.above_band": 0.7520814061054579, "tokens_rate.below_band": 0.0009250693802035153, "tokens_rate.in_band": 0.24699352451433856 }, { "epoch": 0.06497656582871751, "grad_norm": 105.27737944046548, "learning_rate": 3.234042553191489e-07, "loss": 0.2076, "step": 305, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8333333333333334, "success_rate.epoch.env.math": 0.92, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6419753086419753, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8787401795735129, "success_rate.epoch.global": 0.805921052631579, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.990320796460177, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9912280701754386, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008771929824561403 }, { "epoch": 0.06604175543246697, "grad_norm": 128.20518201903698, "learning_rate": 3.2872340425531914e-07, "loss": 0.3951, "step": 310, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8375, "success_rate.epoch.env.math": 0.9204545454545454, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6506024096385542, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8799445716751304, "success_rate.epoch.global": 0.8090614886731392, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965659340659341, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.9680851063829787, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031914893617021274 }, { "epoch": 0.06710694503621645, "grad_norm": 111.42614963644284, "learning_rate": 3.3404255319148936e-07, "loss": 0.198, "step": 315, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8414634146341463, "success_rate.epoch.env.math": 0.9213483146067416, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6535433070866141, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8806534881509851, "success_rate.epoch.global": 0.810207336523126, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9887242268041238, "tokens_p.mean_in_band": 0.55810546875, "tokens_rate.above_band": 0.9238095238095239, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0761904761904762 }, { "epoch": 0.06817213463996591, "grad_norm": 80.68678770558238, "learning_rate": 3.393617021276596e-07, "loss": 0.3028, "step": 320, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8414634146341463, "success_rate.epoch.env.math": 0.9243243243243243, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.65234375, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8808149838447096, "success_rate.epoch.global": 0.8116169544740973, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99625, "tokens_p.mean_in_band": 0.4539930555555556, "tokens_rate.above_band": 0.9433962264150944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05660377358490566 }, { "epoch": 0.06923732424371538, "grad_norm": 212.6150375396718, "learning_rate": 3.4468085106382976e-07, "loss": 0.5615, "step": 325, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8452380952380952, "success_rate.epoch.env.math": 0.9251336898395722, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6538461538461539, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8813682974779232, "success_rate.epoch.global": 0.8129829984544049, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959656762295082, "tokens_p.mean_in_band": 0.54296875, "tokens_rate.above_band": 0.9838709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016129032258064516 }, { "epoch": 0.07030251384746485, "grad_norm": 274.6090402190955, "learning_rate": 3.5e-07, "loss": 0.3874, "step": 330, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8470588235294118, "success_rate.epoch.env.math": 0.9259259259259259, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6566037735849056, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8818565323975979, "success_rate.epoch.global": 0.8143074581430746, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955538617886179, "tokens_p.mean_in_band": 0.5929129464285714, "tokens_rate.above_band": 0.9213483146067416, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07865168539325842 }, { "epoch": 0.07136770345121432, "grad_norm": 78.24440459832996, "learning_rate": 3.553191489361702e-07, "loss": 0.407, "step": 335, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8409090909090909, "success_rate.epoch.env.math": 0.921875, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.654275092936803, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8807175015617479, "success_rate.epoch.global": 0.8110944527736131, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.611111111111111, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9953568447412354, "tokens_p.mean_in_band": 0.4287405303030303, "tokens_rate.above_band": 0.9477848101265823, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05221518987341772 }, { "epoch": 0.07243289305496378, "grad_norm": 78.54678932890421, "learning_rate": 3.606382978723404e-07, "loss": 0.2786, "step": 340, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8478260869565217, "success_rate.epoch.env.math": 0.9226804123711341, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6568265682656826, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8816514909933338, "success_rate.epoch.global": 0.8136094674556213, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973739495798319, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9916666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008333333333333333 }, { "epoch": 0.07349808265871326, "grad_norm": 270.3106115154008, "learning_rate": 3.659574468085106e-07, "loss": 0.442, "step": 345, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.8494623655913979, "success_rate.epoch.env.math": 0.9242424242424242, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.656934306569343, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8821242153754945, "success_rate.epoch.global": 0.814868804664723, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949324324324325, "tokens_p.mean_in_band": 0.6143973214285714, "tokens_rate.above_band": 0.9814323607427056, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01856763925729443 }, { "epoch": 0.07456327226246272, "grad_norm": 47.5508422766998, "learning_rate": 3.7127659574468083e-07, "loss": 0.338, "step": 350, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.851063829787234, "success_rate.epoch.env.math": 0.925, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6571428571428571, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8823576326961241, "success_rate.epoch.global": 0.8146551724137931, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967161016949152, "tokens_p.mean_in_band": 0.6422991071428571, "tokens_rate.above_band": 0.9546925566343042, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045307443365695796 }, { "epoch": 0.07562846186621218, "grad_norm": 112.27176807594125, "learning_rate": 3.7659574468085106e-07, "loss": 0.3081, "step": 355, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8526315789473684, "success_rate.epoch.env.math": 0.9261083743842364, "success_rate.epoch.env.sat": 0.25, "success_rate.epoch.env.science": 0.6584507042253521, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8753061025586539, "success_rate.epoch.global": 0.8144475920679887, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988411125319693, "tokens_p.mean_in_band": 0.6462296195652174, "tokens_rate.above_band": 0.9714285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02857142857142857 }, { "epoch": 0.07669365146996165, "grad_norm": 72.5725592936309, "learning_rate": 3.8191489361702123e-07, "loss": 0.2553, "step": 360, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8556701030927835, "success_rate.epoch.env.math": 0.9261083743842364, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6608996539792388, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712595092767721, "success_rate.epoch.global": 0.8142458100558659, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954044117647058, "tokens_p.mean_in_band": 0.6996783088235294, "tokens_rate.above_band": 0.9230769230769231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07692307692307693 }, { "epoch": 0.07775884107371112, "grad_norm": 76.05177626656429, "learning_rate": 3.8723404255319145e-07, "loss": 0.2523, "step": 365, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8556701030927835, "success_rate.epoch.env.math": 0.9271844660194175, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6632302405498282, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718662958278611, "success_rate.epoch.global": 0.8168044077134986, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946409189580319, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9978339350180505, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0021660649819494585 }, { "epoch": 0.07882403067746059, "grad_norm": 38.66044484751237, "learning_rate": 3.925531914893617e-07, "loss": 0.1606, "step": 370, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8571428571428571, "success_rate.epoch.env.math": 0.9282296650717703, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6621160409556314, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719939097831548, "success_rate.epoch.global": 0.8179347826086957, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984452736318408, "tokens_p.mean_in_band": 0.4765625, "tokens_rate.above_band": 0.9901477832512315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009852216748768473 }, { "epoch": 0.07988922028121005, "grad_norm": 31.630314299338544, "learning_rate": 3.978723404255319e-07, "loss": 0.3096, "step": 375, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8571428571428571, "success_rate.epoch.env.math": 0.9289099526066351, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6644067796610169, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8725298191330126, "success_rate.epoch.global": 0.8203753351206434, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994218910585817, "tokens_p.mean_in_band": 0.806640625, "tokens_rate.above_band": 0.9959058341862845, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0040941658137154556 }, { "epoch": 0.08095440988495953, "grad_norm": 133.9275449390992, "learning_rate": 3.9999999475269154e-07, "loss": 0.4971, "step": 380, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.85, "success_rate.epoch.env.math": 0.9299065420560748, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6666666666666666, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8721765117977611, "success_rate.epoch.global": 0.8201058201058201, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9920164233576643, "tokens_p.mean_in_band": 0.4291294642857143, "tokens_rate.above_band": 0.9513888888888888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04861111111111111 }, { "epoch": 0.08201959948870899, "grad_norm": 70.91299771339187, "learning_rate": 3.9999996268581145e-07, "loss": 0.3012, "step": 385, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8543689320388349, "success_rate.epoch.env.math": 0.9308755760368663, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6666666666666666, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8726617814359091, "success_rate.epoch.global": 0.8211488250652742, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980053191489362, "tokens_p.mean_in_band": 0.42578125, "tokens_rate.above_band": 0.9791666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020833333333333332 }, { "epoch": 0.08308478909245846, "grad_norm": 196.63393882497718, "learning_rate": 3.99999901467246e-07, "loss": 0.3086, "step": 390, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8543689320388349, "success_rate.epoch.env.math": 0.9324324324324325, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6644951140065146, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8726058999573102, "success_rate.epoch.global": 0.8208762886597938, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9915780141843972, "tokens_p.mean_in_band": 0.5732421875, "tokens_rate.above_band": 0.9463087248322147, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053691275167785234 }, { "epoch": 0.08414997869620792, "grad_norm": 54.23492780550172, "learning_rate": 3.9999981109703984e-07, "loss": 0.3664, "step": 395, "success_rate.epoch.env.abd": 0.9767441860465116, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8557692307692307, "success_rate.epoch.env.math": 0.9288888888888889, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.667741935483871, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707147445369501, "success_rate.epoch.global": 0.8206106870229007, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968155095184771, "tokens_p.mean_below_band": 3.625949223836263e-07, "tokens_p.mean_in_band": 0.10156926114236262, "tokens_rate.above_band": 0.2779333955804544, "tokens_rate.below_band": 0.0028011204481792717, "tokens_rate.in_band": 0.7192654839713664 }, { "epoch": 0.08521516829995739, "grad_norm": 79.04932777844998, "learning_rate": 3.999996915752588e-07, "loss": 0.2101, "step": 400, "success_rate.epoch.env.abd": 0.9767441860465116, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8571428571428571, "success_rate.epoch.env.math": 0.9298245614035088, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6719745222929936, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868279158388226, "success_rate.epoch.global": 0.821608040201005, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981986215538847, "tokens_p.mean_in_band": 0.7042100694444444, "tokens_rate.above_band": 0.9568345323741008, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04316546762589928 }, { "epoch": 0.08628035790370686, "grad_norm": 151.24303411741707, "learning_rate": 3.9999954290198994e-07, "loss": 0.4614, "step": 405, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8611111111111112, "success_rate.epoch.env.math": 0.9301310043668122, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6729559748427673, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688050393616393, "success_rate.epoch.global": 0.8225806451612904, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983974358974359, "tokens_p.mean_in_band": 0.716015625, "tokens_rate.above_band": 0.9722991689750693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027700831024930747 }, { "epoch": 0.08734554750745632, "grad_norm": 83.87010578776845, "learning_rate": 3.999993650773417e-07, "loss": 0.5082, "step": 410, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8611111111111112, "success_rate.epoch.env.math": 0.9313304721030042, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6718266253869969, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688114137507685, "success_rate.epoch.global": 0.8223039215686274, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968297101449275, "tokens_p.mean_in_band": 0.5022321428571429, "tokens_rate.above_band": 0.9672897196261683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03271028037383177 }, { "epoch": 0.0884107371112058, "grad_norm": 158.2557486788325, "learning_rate": 3.999991581014437e-07, "loss": 0.3502, "step": 415, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8623853211009175, "success_rate.epoch.env.math": 0.9279661016949152, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6717791411042945, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8646645132443549, "success_rate.epoch.global": 0.8208232445520581, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982092696629213, "tokens_p.mean_in_band": 0.599365234375, "tokens_rate.above_band": 0.9910913140311804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008908685968819599 }, { "epoch": 0.08947592671495526, "grad_norm": 89.24530838881262, "learning_rate": 3.9999892197444665e-07, "loss": 0.3589, "step": 420, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 0.9583333333333334, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8623853211009175, "success_rate.epoch.env.math": 0.9288702928870293, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6707317073170732, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649327316888861, "success_rate.epoch.global": 0.8217703349282297, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998679577464789, "tokens_p.mean_in_band": 0.359375, "tokens_rate.above_band": 0.9989447766443897, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001055223355610271 }, { "epoch": 0.09054111631870473, "grad_norm": 774.5868925201706, "learning_rate": 3.9999865669652263e-07, "loss": 0.5007, "step": 425, "success_rate.epoch.env.abd": 0.9782608695652174, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8623853211009175, "success_rate.epoch.env.math": 0.9291666666666667, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6746987951807228, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8620361664886649, "success_rate.epoch.global": 0.8226950354609929, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999501329787234, "tokens_p.mean_in_band": 0.6555989583333334, "tokens_rate.above_band": 0.9936575052854123, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006342494714587738 }, { "epoch": 0.0916063059224542, "grad_norm": 73.24465644591767, "learning_rate": 3.9999836226786514e-07, "loss": 0.3772, "step": 430, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.8623853211009175, "success_rate.epoch.env.math": 0.9300411522633745, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6776119402985075, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626288051525282, "success_rate.epoch.global": 0.8247663551401869, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974628712871287, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.09267149552620367, "grad_norm": 78.03466765787174, "learning_rate": 3.9999803868868867e-07, "loss": 0.3244, "step": 435, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.8660714285714286, "success_rate.epoch.env.math": 0.9317269076305221, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6776119402985075, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633968765993082, "success_rate.epoch.global": 0.8267898383371824, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9952042079207921, "tokens_p.mean_in_band": 0.7994791666666666, "tokens_rate.above_band": 0.9711538461538461, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028846153846153848 }, { "epoch": 0.09373668512995313, "grad_norm": 51.250560278260515, "learning_rate": 3.99997685959229e-07, "loss": 0.3209, "step": 440, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8660714285714286, "success_rate.epoch.env.math": 0.932806324110672, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6794117647058824, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637547241756679, "success_rate.epoch.global": 0.8276255707762558, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950850938967136, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9594594594594594, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04054054054054054 }, { "epoch": 0.09480187473370259, "grad_norm": 181.4886482421037, "learning_rate": 3.999973040797433e-07, "loss": 0.2428, "step": 445, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8660714285714286, "success_rate.epoch.env.math": 0.93359375, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6820809248554913, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640689592701167, "success_rate.epoch.global": 0.8284424379232506, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949919871794872, "tokens_p.mean_in_band": 0.5953480113636364, "tokens_rate.above_band": 0.9341317365269461, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0658682634730539 }, { "epoch": 0.09586706433745207, "grad_norm": 110.38045176779245, "learning_rate": 3.999968930505097e-07, "loss": 0.505, "step": 450, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.8660714285714286, "success_rate.epoch.env.math": 0.9341085271317829, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6818181818181818, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624224970638761, "success_rate.epoch.global": 0.8270089285714286, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9923349056603774, "tokens_p.mean_in_band": 0.6490234375, "tokens_rate.above_band": 0.8793363499245852, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12066365007541478 }, { "epoch": 0.09693225394120153, "grad_norm": 51.92290806746458, "learning_rate": 3.999964528718279e-07, "loss": 0.3702, "step": 455, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.8660714285714286, "success_rate.epoch.env.math": 0.9348659003831418, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6853932584269663, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630568563825712, "success_rate.epoch.global": 0.8289183222958058, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983591885441527, "tokens_p.mean_in_band": 0.7330729166666666, "tokens_rate.above_band": 0.9858823529411764, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01411764705882353 }, { "epoch": 0.097997443544951, "grad_norm": 269.1855873337498, "learning_rate": 3.9999598354401874e-07, "loss": 0.583, "step": 460, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8596491228070176, "success_rate.epoch.env.math": 0.935361216730038, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6861111111111111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627150508033847, "success_rate.epoch.global": 0.8286026200873362, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974735086342229, "tokens_p.mean_in_band": 0.6817908653846154, "tokens_rate.above_band": 0.98989898989899, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010101010101010102 }, { "epoch": 0.09906263314870047, "grad_norm": 202.27025054914114, "learning_rate": 3.9999548506742416e-07, "loss": 0.3018, "step": 465, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8596491228070176, "success_rate.epoch.env.math": 0.9360902255639098, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6866485013623979, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628301779929447, "success_rate.epoch.global": 0.82829373650108, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.990530303030303, "tokens_p.mean_in_band": 0.6188616071428571, "tokens_rate.above_band": 0.8761061946902655, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12389380530973451 }, { "epoch": 0.10012782275244994, "grad_norm": 114.40305866456586, "learning_rate": 3.9999495744240743e-07, "loss": 0.2159, "step": 470, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8608695652173913, "success_rate.epoch.env.math": 0.9363295880149812, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6916890080428955, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860823712808446, "success_rate.epoch.global": 0.8290598290598291, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988591269841269, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.9984152139461173, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001584786053882726 }, { "epoch": 0.1011930123561994, "grad_norm": 83.56720179963696, "learning_rate": 3.9999440066935317e-07, "loss": 0.4819, "step": 475, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9361702127659575, "success_rate.epoch.env.logic": 0.8620689655172413, "success_rate.epoch.env.math": 0.937037037037037, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6904761904761905, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610129524289466, "success_rate.epoch.global": 0.828752642706131, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9935416666666667, "tokens_p.mean_in_band": 0.58203125, "tokens_rate.above_band": 0.9463722397476341, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05362776025236593 }, { "epoch": 0.10225820195994888, "grad_norm": 74.71988528002478, "learning_rate": 3.9999381474866716e-07, "loss": 0.3764, "step": 480, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.85, "success_rate.epoch.env.math": 0.9375, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6894736842105263, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8599876141029555, "success_rate.epoch.global": 0.8274058577405857, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.999594395280236, "tokens_p.mean_in_band": 0.47119140625, "tokens_rate.above_band": 0.9814707585408222, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018529241459177764 }, { "epoch": 0.10332339156369834, "grad_norm": 94.28729333594796, "learning_rate": 3.9999319968077624e-07, "loss": 0.3541, "step": 485, "success_rate.epoch.env.abd": 0.9791666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.85, "success_rate.epoch.env.math": 0.9347826086956522, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6901041666666666, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8601314468315695, "success_rate.epoch.global": 0.8271221532091098, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982839595375722, "tokens_p.mean_in_band": 0.6754807692307693, "tokens_rate.above_band": 0.9637883008356546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036211699164345405 }, { "epoch": 0.1043885811674478, "grad_norm": 51.53614996122157, "learning_rate": 3.999925554661289e-07, "loss": 0.2195, "step": 490, "success_rate.epoch.env.abd": 0.98, "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8524590163934426, "success_rate.epoch.env.math": 0.9357142857142857, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6909090909090909, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605886241941, "success_rate.epoch.global": 0.8288934426229508, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999047256097561, "tokens_p.mean_in_band": 0.7209821428571429, "tokens_rate.above_band": 0.9590643274853801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04093567251461988 }, { "epoch": 0.10545377077119727, "grad_norm": 546.5049464676177, "learning_rate": 3.999918821051945e-07, "loss": 0.5175, "step": 495, "success_rate.epoch.env.abd": 0.98, "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.856, "success_rate.epoch.env.math": 0.9361702127659575, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6889460154241646, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8608894737738619, "success_rate.epoch.global": 0.8286004056795132, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961295871559633, "tokens_p.mean_in_band": 0.3876201923076923, "tokens_rate.above_band": 0.8934426229508197, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10655737704918032 }, { "epoch": 0.10651896037494674, "grad_norm": 259.1329333652082, "learning_rate": 3.999911795984638e-07, "loss": 0.3655, "step": 500, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.8582677165354331, "success_rate.epoch.env.math": 0.9366197183098591, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6895674300254453, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612629162693026, "success_rate.epoch.global": 0.8293172690763052, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987847222222223, "tokens_p.mean_in_band": 0.772265625, "tokens_rate.above_band": 0.9473684210526315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05263157894736842 }, { "epoch": 0.10758414997869621, "grad_norm": 125.78633647258418, "learning_rate": 3.999904479464488e-07, "loss": 0.367, "step": 505, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.8515625, "success_rate.epoch.env.math": 0.9372822299651568, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6917293233082706, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609101152150017, "success_rate.epoch.global": 0.8290258449304175, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924395161290323, "tokens_p.mean_in_band": 0.6408420138888888, "tokens_rate.above_band": 0.9117647058823529, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08823529411764706 }, { "epoch": 0.10864933958244567, "grad_norm": 83.79745537983887, "learning_rate": 3.999896871496827e-07, "loss": 0.3141, "step": 510, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.90625, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8527131782945736, "success_rate.epoch.env.math": 0.9377162629757786, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6940298507462687, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617565145491594, "success_rate.epoch.global": 0.8307086614173228, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994205809641533, "tokens_p.mean_in_band": 0.5953125, "tokens_rate.above_band": 0.9938574938574939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006142506142506142 }, { "epoch": 0.10971452918619515, "grad_norm": 92.02197236937367, "learning_rate": 3.9998889720872003e-07, "loss": 0.4128, "step": 515, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.90625, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8549618320610687, "success_rate.epoch.env.math": 0.9379310344827586, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6921182266009852, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8621037668204685, "success_rate.epoch.global": 0.8304093567251462, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975524664647338, "tokens_p.mean_in_band": 0.451171875, "tokens_rate.above_band": 0.9948342660352992, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005165733964700818 }, { "epoch": 0.11077971878994461, "grad_norm": 66.75270647588353, "learning_rate": 3.9998807812413637e-07, "loss": 0.2742, "step": 520, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9454545454545454, "success_rate.epoch.env.logic": 0.8571428571428571, "success_rate.epoch.env.math": 0.9383561643835616, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6919315403422983, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627067962320488, "success_rate.epoch.global": 0.831081081081081, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973300970873786, "tokens_p.mean_in_band": 0.7044270833333334, "tokens_rate.above_band": 0.9942084942084942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005791505791505791 }, { "epoch": 0.11184490839369408, "grad_norm": 274.2701795228874, "learning_rate": 3.9998722989652877e-07, "loss": 0.5266, "step": 525, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8582089552238806, "success_rate.epoch.env.math": 0.9387755102040817, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6934306569343066, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635389227468108, "success_rate.epoch.global": 0.8326959847036329, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997073126801153, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.11291009799744355, "grad_norm": 84.57627879830797, "learning_rate": 3.999863525265154e-07, "loss": 0.3129, "step": 530, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8592592592592593, "success_rate.epoch.env.math": 0.9389830508474576, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6968973747016707, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639684284237308, "success_rate.epoch.global": 0.8333333333333334, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933035714285714, "tokens_p.mean_in_band": 0.5524553571428571, "tokens_rate.above_band": 0.9565217391304348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043478260869565216 }, { "epoch": 0.113975287601193, "grad_norm": 85.09942168277358, "learning_rate": 3.9998544601473564e-07, "loss": 0.4208, "step": 535, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8602941176470589, "success_rate.epoch.env.math": 0.9389830508474576, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6901408450704225, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635106497967339, "success_rate.epoch.global": 0.8302063789868668, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.2857142857142857, "success_rate.window.env_macro_mean": 0.7619047619047619, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9979304635761589, "tokens_p.mean_in_band": 0.4636314655172414, "tokens_rate.above_band": 0.8388888888888889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16111111111111112 }, { "epoch": 0.11504047720494248, "grad_norm": 73.6428983207297, "learning_rate": 3.9998451036185016e-07, "loss": 0.494, "step": 540, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9491525423728814, "success_rate.epoch.env.logic": 0.8613138686131386, "success_rate.epoch.env.math": 0.9391891891891891, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6906976744186046, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641367990677572, "success_rate.epoch.global": 0.8308550185873605, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984691722972973, "tokens_p.mean_in_band": 0.6423611111111112, "tokens_rate.above_band": 0.9924559932942163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0075440067057837385 }, { "epoch": 0.11610566680869194, "grad_norm": 87.36311953159915, "learning_rate": 3.9998354556854086e-07, "loss": 0.3509, "step": 545, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8633093525179856, "success_rate.epoch.env.math": 0.9403973509933775, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6906976744186046, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8645050811892255, "success_rate.epoch.global": 0.8324125230202578, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983552631578947, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9988814317673378, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0011185682326621924 }, { "epoch": 0.11717085641244142, "grad_norm": 99.39985706178769, "learning_rate": 3.999825516355109e-07, "loss": 0.4542, "step": 550, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8671328671328671, "success_rate.epoch.env.math": 0.9407894736842105, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6921296296296297, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628539965262454, "success_rate.epoch.global": 0.833029197080292, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972222222222222, "tokens_p.mean_in_band": 0.5642361111111112, "tokens_rate.above_band": 0.967741935483871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03225806451612903 }, { "epoch": 0.11823604601619088, "grad_norm": 608.1477838662232, "learning_rate": 3.9998152856348464e-07, "loss": 0.6797, "step": 555, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8698630136986302, "success_rate.epoch.env.math": 0.9413680781758957, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6942528735632184, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633773301913101, "success_rate.epoch.global": 0.8345388788426763, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948529411764706, "tokens_p.mean_in_band": 0.865234375, "tokens_rate.above_band": 0.9770114942528736, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022988505747126436 }, { "epoch": 0.11930123561994035, "grad_norm": 132.18093575713627, "learning_rate": 3.999804763532076e-07, "loss": 0.4695, "step": 560, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8648648648648649, "success_rate.epoch.env.math": 0.9419354838709677, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6933638443935927, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630242086794578, "success_rate.epoch.global": 0.8342293906810035, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990693573667712, "tokens_p.mean_in_band": 0.6654829545454546, "tokens_rate.above_band": 0.9830508474576272, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01694915254237288 }, { "epoch": 0.12036642522368982, "grad_norm": 410.0190897977076, "learning_rate": 3.999793950054468e-07, "loss": 0.5749, "step": 565, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8657718120805369, "success_rate.epoch.env.math": 0.9421221864951769, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6968325791855203, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636437211234635, "success_rate.epoch.global": 0.8357015985790408, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980134474327629, "tokens_p.mean_in_band": 0.7724609375, "tokens_rate.above_band": 0.9951338199513382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004866180048661801 }, { "epoch": 0.12143161482743929, "grad_norm": 109.44767367759921, "learning_rate": 3.9997828452099015e-07, "loss": 0.4051, "step": 570, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8618421052631579, "success_rate.epoch.env.math": 0.9430379746835443, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6952595936794582, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632267298384568, "success_rate.epoch.global": 0.835387323943662, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953088914549654, "tokens_p.mean_in_band": 0.6181640625, "tokens_rate.above_band": 0.9474835886214442, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0525164113785558 }, { "epoch": 0.12249680443118875, "grad_norm": 223.9658910833248, "learning_rate": 3.9997714490064704e-07, "loss": 0.4545, "step": 575, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8618421052631579, "success_rate.epoch.env.math": 0.943217665615142, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.697986577181208, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639212199507322, "success_rate.epoch.global": 0.8368237347294939, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981589147286821, "tokens_p.mean_in_band": 0.7375, "tokens_rate.above_band": 0.9923076923076923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007692307692307693 }, { "epoch": 0.12356199403493821, "grad_norm": 94.30895427673707, "learning_rate": 3.9997597614524807e-07, "loss": 0.3034, "step": 580, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8653846153846154, "success_rate.epoch.env.math": 0.9435736677115988, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6962305986696231, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641159957422168, "success_rate.epoch.global": 0.8365051903114187, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942781690140845, "tokens_p.mean_in_band": 0.6307091346153846, "tokens_rate.above_band": 0.8452380952380952, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15476190476190477 }, { "epoch": 0.12462718363868769, "grad_norm": 34.76815656056043, "learning_rate": 3.999747782556449e-07, "loss": 0.119, "step": 585, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8662420382165605, "success_rate.epoch.env.math": 0.9444444444444444, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.698237885462555, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644555854111006, "success_rate.epoch.global": 0.8379073756432247, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0006167763157894, "tokens_p.mean_in_band": 0.8385416666666666, "tokens_rate.above_band": 0.9960681520314548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003931847968545216 }, { "epoch": 0.12569237324243715, "grad_norm": 113.48498229136952, "learning_rate": 3.999735512327106e-07, "loss": 0.4068, "step": 590, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.8670886075949367, "success_rate.epoch.env.math": 0.9420731707317073, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6980306345733042, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8643702468539232, "success_rate.epoch.global": 0.8375850340136054, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8833333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980517456359103, "tokens_p.mean_in_band": 0.6691080729166666, "tokens_rate.above_band": 0.9852579852579852, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014742014742014743 }, { "epoch": 0.1267575628461866, "grad_norm": 144.78465893134438, "learning_rate": 3.9997229507733947e-07, "loss": 0.4542, "step": 595, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.8695652173913043, "success_rate.epoch.env.math": 0.9427710843373494, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6993464052287581, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623214529475992, "success_rate.epoch.global": 0.8381112984822934, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990511133603239, "tokens_p.mean_below_band": 2.455635694786906e-10, "tokens_p.mean_in_band": 0.857421875, "tokens_rate.above_band": 0.993963782696177, "tokens_rate.below_band": 0.002012072434607646, "tokens_rate.in_band": 0.004024144869215292 }, { "epoch": 0.1278227524499361, "grad_norm": 63.654772111912095, "learning_rate": 3.999710097904469e-07, "loss": 0.4511, "step": 600, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8703703703703703, "success_rate.epoch.env.math": 0.9427710843373494, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7044967880085653, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629326879397826, "success_rate.epoch.global": 0.8394648829431438, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976092896174863, "tokens_p.mean_in_band": 0.783203125, "tokens_rate.above_band": 0.9682539682539683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031746031746031744 }, { "epoch": 0.12888794205368556, "grad_norm": 70.54572223671359, "learning_rate": 3.9996969537296963e-07, "loss": 0.243, "step": 605, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8727272727272727, "success_rate.epoch.env.math": 0.9436201780415431, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7029914529914529, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631121320386644, "success_rate.epoch.global": 0.8399668325041459, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998282967032967, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9680851063829787, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031914893617021274 }, { "epoch": 0.12995313165743502, "grad_norm": 34.321238508916935, "learning_rate": 3.9996835182586565e-07, "loss": 0.2804, "step": 610, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8734939759036144, "success_rate.epoch.env.math": 0.9441176470588235, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.701271186440678, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631623469138264, "success_rate.epoch.global": 0.8396381578947368, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968603971962616, "tokens_p.mean_in_band": 0.435546875, "tokens_rate.above_band": 0.9553571428571429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044642857142857144 }, { "epoch": 0.13101832126118448, "grad_norm": 99.86068617393661, "learning_rate": 3.9996697915011404e-07, "loss": 0.5902, "step": 615, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.874251497005988, "success_rate.epoch.env.math": 0.9446064139941691, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7044025157232704, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635835863415864, "success_rate.epoch.global": 0.8409461663947798, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9907945736434108, "tokens_p.mean_in_band": 0.85, "tokens_rate.above_band": 0.9626865671641791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03731343283582089 }, { "epoch": 0.13208351086493395, "grad_norm": 59.76441666171027, "learning_rate": 3.999655773467152e-07, "loss": 0.254, "step": 620, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.875, "success_rate.epoch.env.math": 0.9455587392550143, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.702928870292887, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636267862231765, "success_rate.epoch.global": 0.8414239482200647, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993489583333334, "tokens_p.mean_in_band": 0.4973958333333333, "tokens_rate.above_band": 0.9896907216494846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010309278350515464 }, { "epoch": 0.13314870046868343, "grad_norm": 109.9478775600475, "learning_rate": 3.9996414641669086e-07, "loss": 0.5104, "step": 625, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8757396449704142, "success_rate.epoch.env.math": 0.9455587392550143, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.702258726899384, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636331045483501, "success_rate.epoch.global": 0.8402889245585875, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9866504854368932, "tokens_p.mean_in_band": 0.5562160326086957, "tokens_rate.above_band": 0.8174603174603174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18253968253968253 }, { "epoch": 0.1342138900724329, "grad_norm": 260.60151656044053, "learning_rate": 3.999626863610838e-07, "loss": 0.3138, "step": 630, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9538461538461539, "success_rate.epoch.env.logic": 0.8771929824561403, "success_rate.epoch.env.math": 0.9458689458689459, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7014314928425358, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642832835832109, "success_rate.epoch.global": 0.8407643312101911, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977854330708661, "tokens_p.mean_in_band": 0.3307291666666667, "tokens_rate.above_band": 0.9970559371933267, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002944062806673209 }, { "epoch": 0.13527907967618236, "grad_norm": 217.6153973733924, "learning_rate": 3.9996119718095804e-07, "loss": 0.4998, "step": 635, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9538461538461539, "success_rate.epoch.env.logic": 0.8786127167630058, "success_rate.epoch.env.math": 0.9463276836158192, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.6997971602434077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862682101455708, "success_rate.epoch.global": 0.839652448657188, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9925271739130435, "tokens_p.mean_in_band": 0.4596354166666667, "tokens_rate.above_band": 0.8518518518518519, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14814814814814814 }, { "epoch": 0.13634426927993182, "grad_norm": 909.8461633131893, "learning_rate": 3.99959678877399e-07, "loss": 0.3338, "step": 640, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8806818181818182, "success_rate.epoch.env.math": 0.9466292134831461, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.6995967741935484, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629429692680336, "success_rate.epoch.global": 0.8401253918495298, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993470149253731, "tokens_p.mean_in_band": 0.5807291666666666, "tokens_rate.above_band": 0.9955423476968797, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004457652303120356 }, { "epoch": 0.1374094588836813, "grad_norm": 54.04995690793635, "learning_rate": 3.999581314515131e-07, "loss": 0.3359, "step": 645, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8820224719101124, "success_rate.epoch.env.math": 0.947075208913649, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.702, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633457201390002, "success_rate.epoch.global": 0.8413685847589425, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949252136752137, "tokens_p.mean_in_band": 0.673828125, "tokens_rate.above_band": 0.9831932773109243, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01680672268907563 }, { "epoch": 0.13847464848743077, "grad_norm": 122.77730185305981, "learning_rate": 3.999565549044282e-07, "loss": 0.4627, "step": 650, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.8820224719101124, "success_rate.epoch.env.math": 0.9475138121546961, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7015810276679841, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634091798626904, "success_rate.epoch.global": 0.8410493827160493, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9874174917491749, "tokens_p.mean_in_band": 0.6307291666666667, "tokens_rate.above_band": 0.9528301886792453, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04716981132075472 }, { "epoch": 0.13953983809118023, "grad_norm": 241.3829237230056, "learning_rate": 3.9995494923729314e-07, "loss": 0.693, "step": 655, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.8833333333333333, "success_rate.epoch.env.math": 0.9480874316939891, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7033398821218074, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863800253295463, "success_rate.epoch.global": 0.8422664624808576, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967350746268657, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.1406050276949297, "grad_norm": 60.80686084518257, "learning_rate": 3.999533144512783e-07, "loss": 0.3258, "step": 660, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8852459016393442, "success_rate.epoch.env.math": 0.9483695652173914, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.703125, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641030208818453, "success_rate.epoch.global": 0.8427051671732523, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980867346938775, "tokens_p.mean_in_band": 0.5651041666666666, "tokens_rate.above_band": 0.9879032258064516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012096774193548387 }, { "epoch": 0.14167021729867915, "grad_norm": 91.79210271315232, "learning_rate": 3.9995165054757497e-07, "loss": 0.4259, "step": 665, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9571428571428572, "success_rate.epoch.env.logic": 0.8852459016393442, "success_rate.epoch.env.math": 0.9487870619946092, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7001934235976789, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639309334625108, "success_rate.epoch.global": 0.8416289592760181, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983747044917257, "tokens_p.mean_below_band": 3.655441105365753e-08, "tokens_p.mean_in_band": 0.44345238095238093, "tokens_rate.above_band": 0.950561797752809, "tokens_rate.below_band": 0.0022471910112359553, "tokens_rate.in_band": 0.04719101123595506 }, { "epoch": 0.14273540690242864, "grad_norm": 246.6225688094241, "learning_rate": 3.9994995752739583e-07, "loss": 0.4846, "step": 670, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9571428571428572, "success_rate.epoch.env.logic": 0.8858695652173914, "success_rate.epoch.env.math": 0.9493333333333334, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7005758157389635, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640932450344341, "success_rate.epoch.global": 0.8420658682634731, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915303738317757, "tokens_p.mean_in_band": 0.6967329545454546, "tokens_rate.above_band": 0.9067796610169492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09322033898305085 }, { "epoch": 0.1438005965061781, "grad_norm": 184.56099259093554, "learning_rate": 3.9994823539197464e-07, "loss": 0.316, "step": 675, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8864864864864865, "success_rate.epoch.env.math": 0.9498680738786279, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.6984732824427481, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640616768752118, "success_rate.epoch.global": 0.8417533432392273, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666668, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989472517730497, "tokens_p.mean_in_band": 0.5361328125, "tokens_rate.above_band": 0.986013986013986, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013986013986013986 }, { "epoch": 0.14486578610992756, "grad_norm": 199.511712911517, "learning_rate": 3.999464841425667e-07, "loss": 0.4566, "step": 680, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8823529411764706, "success_rate.epoch.env.math": 0.9502617801047121, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.6996197718631179, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863927772054091, "success_rate.epoch.global": 0.8421828908554573, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982244318181818, "tokens_p.mean_in_band": 0.5126953125, "tokens_rate.above_band": 0.9777777777777777, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022222222222222223 }, { "epoch": 0.14593097571367702, "grad_norm": 92.45131206056814, "learning_rate": 3.999447037804481e-07, "loss": 0.3569, "step": 685, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8829787234042553, "success_rate.epoch.env.math": 0.9509043927648578, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7013232514177694, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642562175162373, "success_rate.epoch.global": 0.8433382137628112, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996149289099526, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.1469961653174265, "grad_norm": 113.64849552894992, "learning_rate": 3.9994289430691644e-07, "loss": 0.3519, "step": 690, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8829787234042553, "success_rate.epoch.env.math": 0.9516539440203562, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.701688555347092, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8643575679875846, "success_rate.epoch.global": 0.84375, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941860465116279, "tokens_p.mean_in_band": 0.646484375, "tokens_rate.above_band": 0.9347826086956522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06521739130434782 }, { "epoch": 0.14806135492117597, "grad_norm": 185.34855306142984, "learning_rate": 3.9994105572329047e-07, "loss": 0.4525, "step": 695, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8842105263157894, "success_rate.epoch.env.math": 0.9517766497461929, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7050092764378478, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8736209727103436, "success_rate.epoch.global": 0.8448773448773449, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967570754716981, "tokens_p.mean_in_band": 0.7512019230769231, "tokens_rate.above_band": 0.9760589318600368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02394106813996317 }, { "epoch": 0.14912654452492544, "grad_norm": 185.19581163224188, "learning_rate": 3.999391880309101e-07, "loss": 0.5116, "step": 700, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8802083333333334, "success_rate.epoch.env.math": 0.952020202020202, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7058823529411765, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8733780240307648, "success_rate.epoch.global": 0.8445558739255015, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968861209964412, "tokens_p.mean_in_band": 0.6357421875, "tokens_rate.above_band": 0.939799331103679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06020066889632107 }, { "epoch": 0.1501917341286749, "grad_norm": 39.156166978674044, "learning_rate": 3.999372912311365e-07, "loss": 0.3961, "step": 705, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8808290155440415, "success_rate.epoch.env.math": 0.9526184538653366, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7074954296160878, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.873690911924248, "success_rate.epoch.global": 0.8456614509246089, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999633072407045, "tokens_p.mean_in_band": 0.7099609375, "tokens_rate.above_band": 0.9845857418111753, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015414258188824663 }, { "epoch": 0.15125692373242436, "grad_norm": 41.85091522492022, "learning_rate": 3.9993536532535207e-07, "loss": 0.3028, "step": 710, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.882051282051282, "success_rate.epoch.env.math": 0.9528535980148883, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7065217391304348, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717146844650586, "success_rate.epoch.global": 0.844632768361582, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9896537162162162, "tokens_p.mean_in_band": 0.5136088709677419, "tokens_rate.above_band": 0.8268156424581006, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17318435754189945 }, { "epoch": 0.15232211333617385, "grad_norm": 116.37390393970315, "learning_rate": 3.9993341031496035e-07, "loss": 0.253, "step": 715, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8838383838383839, "success_rate.epoch.env.math": 0.9530864197530864, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7091561938958707, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8721378097642165, "success_rate.epoch.global": 0.8457223001402524, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9926339285714286, "tokens_p.mean_in_band": 0.8098958333333334, "tokens_rate.above_band": 0.9790209790209791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02097902097902098 }, { "epoch": 0.1533873029399233, "grad_norm": 116.36650017890712, "learning_rate": 3.999314262013862e-07, "loss": 0.5568, "step": 720, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8855721393034826, "success_rate.epoch.env.math": 0.9535452322738386, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7107142857142857, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8724787788373315, "success_rate.epoch.global": 0.8467966573816156, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9876543209876543, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012345679012345678 }, { "epoch": 0.15445249254367277, "grad_norm": 198.86925981719995, "learning_rate": 3.9992941298607557e-07, "loss": 0.3691, "step": 725, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8861386138613861, "success_rate.epoch.env.math": 0.9536585365853658, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7107583774250441, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8725634070116645, "success_rate.epoch.global": 0.8464730290456431, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926571038251366, "tokens_p.mean_in_band": 0.4909855769230769, "tokens_rate.above_band": 0.9336734693877551, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0663265306122449 }, { "epoch": 0.15551768214742223, "grad_norm": 74.23956330436344, "learning_rate": 3.9992737067049566e-07, "loss": 0.3195, "step": 730, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9302325581395349, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9583333333333334, "success_rate.epoch.env.logic": 0.8872549019607843, "success_rate.epoch.env.math": 0.9537712895377128, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.712280701754386, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714177934953359, "success_rate.epoch.global": 0.8468406593406593, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984515765765766, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.9833887043189369, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016611295681063124 }, { "epoch": 0.15658287175117172, "grad_norm": 100.75482028353807, "learning_rate": 3.99925299256135e-07, "loss": 0.4015, "step": 735, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.958904109589041, "success_rate.epoch.env.logic": 0.8878048780487805, "success_rate.epoch.env.math": 0.9538834951456311, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7137870855148342, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720987570873039, "success_rate.epoch.global": 0.8478854024556617, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978782013103038, "tokens_p.mean_in_band": 0.666015625, "tokens_rate.above_band": 0.9982164090368609, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017835909631391202 }, { "epoch": 0.15764806135492118, "grad_norm": 41.415504626263875, "learning_rate": 3.999231987445031e-07, "loss": 0.4562, "step": 740, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.958904109589041, "success_rate.epoch.env.logic": 0.8894230769230769, "success_rate.epoch.env.math": 0.9538834951456311, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7115716753022453, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720444651474591, "success_rate.epoch.global": 0.8468834688346883, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969370860927153, "tokens_p.mean_in_band": 0.5588541666666667, "tokens_rate.above_band": 0.9617834394904459, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03821656050955414 }, { "epoch": 0.15871325095867064, "grad_norm": 109.74259053385872, "learning_rate": 3.9992106913713083e-07, "loss": 0.5705, "step": 745, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9594594594594594, "success_rate.epoch.env.logic": 0.8899521531100478, "success_rate.epoch.env.math": 0.9543269230769231, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7101200686106347, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720513967199203, "success_rate.epoch.global": 0.8465679676985195, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.00049882629108, "tokens_p.mean_in_band": 0.490234375, "tokens_rate.above_band": 0.9906976744186047, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009302325581395349 }, { "epoch": 0.1597784405624201, "grad_norm": 320.24658443520934, "learning_rate": 3.999189104355703e-07, "loss": 0.3239, "step": 750, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.96, "success_rate.epoch.env.logic": 0.892018779342723, "success_rate.epoch.env.math": 0.9544364508393285, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7103918228279387, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8723230738792769, "success_rate.epoch.global": 0.8469251336898396, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981677524429967, "tokens_p.mean_in_band": 0.425537109375, "tokens_rate.above_band": 0.9746031746031746, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025396825396825397 }, { "epoch": 0.16084363016616957, "grad_norm": 78.83909682586267, "learning_rate": 3.999167226413947e-07, "loss": 0.4456, "step": 755, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.8930232558139535, "success_rate.epoch.env.math": 0.9547619047619048, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7108843537414966, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708205221675906, "success_rate.epoch.global": 0.8472775564409031, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998900615655233, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.9973684210526316, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002631578947368421 }, { "epoch": 0.16190881976991905, "grad_norm": 108.22896960316253, "learning_rate": 3.999145057561985e-07, "loss": 0.4478, "step": 760, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.8944954128440367, "success_rate.epoch.env.math": 0.9549763033175356, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7099494097807757, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708888504971356, "success_rate.epoch.global": 0.8469656992084432, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947289156626506, "tokens_p.mean_in_band": 0.59765625, "tokens_rate.above_band": 0.8924731182795699, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10752688172043011 }, { "epoch": 0.16297400937366852, "grad_norm": 179.91587218705112, "learning_rate": 3.9991225978159735e-07, "loss": 0.3552, "step": 765, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.8944954128440367, "success_rate.epoch.env.math": 0.9557109557109557, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7114093959731543, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710883630958446, "success_rate.epoch.global": 0.8479685452162516, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9927591463414634, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.8723404255319149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1276595744680851 }, { "epoch": 0.16403919897741798, "grad_norm": 80.3672189314749, "learning_rate": 3.9990998471922804e-07, "loss": 0.3091, "step": 770, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.8944954128440367, "success_rate.epoch.env.math": 0.9517241379310345, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7123745819397993, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8709082660922077, "success_rate.epoch.global": 0.84765625, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973363774733638, "tokens_p.mean_in_band": 0.71484375, "tokens_rate.above_band": 0.9850074962518741, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014992503748125937 }, { "epoch": 0.16510438858116744, "grad_norm": 54.40434870976095, "learning_rate": 3.999076805707487e-07, "loss": 0.5035, "step": 775, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.8963963963963963, "success_rate.epoch.env.math": 0.952054794520548, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7133333333333334, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712463508887187, "success_rate.epoch.global": 0.8486416558861578, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965861344537815, "tokens_p.mean_in_band": 0.6171875, "tokens_rate.above_band": 0.9966499162479062, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0033500837520938024 }, { "epoch": 0.16616957818491693, "grad_norm": 77.1810737306313, "learning_rate": 3.999053473378385e-07, "loss": 0.5249, "step": 780, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.8968609865470852, "success_rate.epoch.env.math": 0.9522727272727273, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7128712871287128, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712841776934108, "success_rate.epoch.global": 0.8483290488431876, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942434210526315, "tokens_p.mean_in_band": 0.4144965277777778, "tokens_rate.above_band": 0.926829268292683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07317073170731707 }, { "epoch": 0.1672347677886664, "grad_norm": 110.1358637045039, "learning_rate": 3.99902985022198e-07, "loss": 0.5139, "step": 785, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.8973214285714286, "success_rate.epoch.env.math": 0.952808988764045, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7131147540983607, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713969204648027, "success_rate.epoch.global": 0.8486590038314177, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993452380952381, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.963302752293578, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03669724770642202 }, { "epoch": 0.16829995739241585, "grad_norm": 130.90584190579202, "learning_rate": 3.9990059362554866e-07, "loss": 0.5549, "step": 790, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9625, "success_rate.epoch.env.logic": 0.8986784140969163, "success_rate.epoch.env.math": 0.952808988764045, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7138211382113822, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702502421040113, "success_rate.epoch.global": 0.8483502538071066, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989742888402626, "tokens_p.mean_in_band": 0.6667798913043478, "tokens_rate.above_band": 0.9520833333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04791666666666667 }, { "epoch": 0.1693651469961653, "grad_norm": 151.1400154810627, "learning_rate": 3.998981731496335e-07, "loss": 0.2527, "step": 795, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9625, "success_rate.epoch.env.logic": 0.8986784140969163, "success_rate.epoch.env.math": 0.953125, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.714516129032258, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703762764160302, "success_rate.epoch.global": 0.8486759142496847, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933562992125984, "tokens_p.mean_in_band": 0.608154296875, "tokens_rate.above_band": 0.9407407407407408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05925925925925926 }, { "epoch": 0.17043033659991477, "grad_norm": 67.54128195155032, "learning_rate": 3.9989572359621646e-07, "loss": 0.3927, "step": 800, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8995633187772926, "success_rate.epoch.env.math": 0.9534368070953437, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7154340836012861, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8706269863694747, "success_rate.epoch.global": 0.849624060150376, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999609375, "tokens_p.mean_in_band": 0.6276041666666666, "tokens_rate.above_band": 0.9946714031971581, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0053285968028419185 }, { "epoch": 0.17149552620366426, "grad_norm": 84.23383582672564, "learning_rate": 3.9989324496708275e-07, "loss": 0.4299, "step": 805, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9004329004329005, "success_rate.epoch.env.math": 0.9534368070953437, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7165605095541401, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.871015541620284, "success_rate.epoch.global": 0.8499377334993773, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979570217917676, "tokens_p.mean_in_band": 0.55322265625, "tokens_rate.above_band": 0.9627039627039627, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037296037296037296 }, { "epoch": 0.17256071580741372, "grad_norm": 152.75715314646231, "learning_rate": 3.998907372640388e-07, "loss": 0.3903, "step": 810, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9008620689655172, "success_rate.epoch.env.math": 0.9537444933920705, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7176656151419558, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8711829925672986, "success_rate.epoch.global": 0.8502475247524752, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970238095238095, "tokens_p.mean_in_band": 0.669921875, "tokens_rate.above_band": 0.9797160243407708, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02028397565922921 }, { "epoch": 0.17362590541116318, "grad_norm": 707.8209272033347, "learning_rate": 3.998882004889122e-07, "loss": 0.3435, "step": 815, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9017094017094017, "success_rate.epoch.env.math": 0.9540481400437637, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7183098591549296, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713461946953487, "success_rate.epoch.global": 0.8505535055350554, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9920594262295082, "tokens_p.mean_in_band": 0.6953125, "tokens_rate.above_band": 0.9242424242424242, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07575757575757576 }, { "epoch": 0.17469109501491265, "grad_norm": 75.77018405407335, "learning_rate": 3.998856346435517e-07, "loss": 0.3434, "step": 820, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8991596638655462, "success_rate.epoch.env.math": 0.9543478260869566, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.719626168224299, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712613089925039, "success_rate.epoch.global": 0.8508557457212714, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995291095890411, "tokens_p.mean_in_band": 0.7314453125, "tokens_rate.above_band": 0.8795180722891566, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12048192771084337 }, { "epoch": 0.17575628461866213, "grad_norm": 72.17329635538552, "learning_rate": 3.998830397298273e-07, "loss": 0.4809, "step": 825, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9634146341463414, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9546436285097192, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7204968944099379, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716553685193067, "success_rate.epoch.global": 0.8517618469015796, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0015916149068322, "tokens_p.mean_in_band": 0.681640625, "tokens_rate.above_band": 0.9987593052109182, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012406947890818859 }, { "epoch": 0.1768214742224116, "grad_norm": 74.68546642048564, "learning_rate": 3.9988041574963017e-07, "loss": 0.2879, "step": 830, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9634146341463414, "success_rate.epoch.env.logic": 0.9008264462809917, "success_rate.epoch.env.math": 0.9550321199143469, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7217928902627512, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718991699091488, "success_rate.epoch.global": 0.8526570048309179, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965909090909091, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.990990990990991, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009009009009009009 }, { "epoch": 0.17788666382616106, "grad_norm": 0.0, "learning_rate": 3.998777627048726e-07, "loss": 0.2862, "step": 835, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.963855421686747, "success_rate.epoch.env.logic": 0.9016393442622951, "success_rate.epoch.env.math": 0.9552238805970149, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7211093990755008, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720422455403741, "success_rate.epoch.global": 0.8529411764705882, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979674796747967, "tokens_p.mean_in_band": 0.380859375, "tokens_rate.above_band": 0.9935379644588045, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006462035541195477 }, { "epoch": 0.17895185342991052, "grad_norm": 211.98391817922078, "learning_rate": 3.998750805974882e-07, "loss": 0.5927, "step": 840, "success_rate.epoch.env.abd": 0.9875, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9016393442622951, "success_rate.epoch.env.math": 0.9556025369978859, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7212863705972435, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710640079462759, "success_rate.epoch.global": 0.8526252983293556, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9847134476534296, "tokens_p.mean_in_band": 0.6323162141393442, "tokens_rate.above_band": 0.8195266272189349, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1804733727810651 }, { "epoch": 0.18001704303365998, "grad_norm": 115.45759197346109, "learning_rate": 3.998723694294316e-07, "loss": 0.3578, "step": 845, "success_rate.epoch.env.abd": 0.9875, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9024390243902439, "success_rate.epoch.env.math": 0.9556962025316456, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.723823975720789, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713759125631172, "success_rate.epoch.global": 0.8534994068801898, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989600665557404, "tokens_p.mean_in_band": 0.74921875, "tokens_rate.above_band": 0.9917491749174917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00825082508250825 }, { "epoch": 0.18108223263740947, "grad_norm": 103.01654660605729, "learning_rate": 3.9986962920267865e-07, "loss": 0.3262, "step": 850, "success_rate.epoch.env.abd": 0.9876543209876543, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.9558823529411765, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7239819004524887, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716411699496153, "success_rate.epoch.global": 0.8537735849056604, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996828007518797, "tokens_p.mean_in_band": 0.705078125, "tokens_rate.above_band": 0.981549815498155, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01845018450184502 }, { "epoch": 0.18214742224115893, "grad_norm": 43.53353342667868, "learning_rate": 3.9986685991922645e-07, "loss": 0.4394, "step": 855, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9036144578313253, "success_rate.epoch.env.math": 0.9560669456066946, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7256371814092953, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719397953282164, "success_rate.epoch.global": 0.854630715123095, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974061264822134, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.9921568627450981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00784313725490196 }, { "epoch": 0.1832126118449084, "grad_norm": 56.361183853925176, "learning_rate": 3.998640615810933e-07, "loss": 0.2373, "step": 860, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.904, "success_rate.epoch.env.math": 0.9544513457556936, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.726457399103139, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719411889718114, "success_rate.epoch.global": 0.8548951048951049, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971289752650176, "tokens_p.mean_below_band": 6.344635039567947e-09, "tokens_p.mean_in_band": 0.8447265625, "tokens_rate.above_band": 0.9826388888888888, "tokens_rate.below_band": 0.003472222222222222, "tokens_rate.in_band": 0.013888888888888888 }, { "epoch": 0.18427780144865785, "grad_norm": 149.68786296746322, "learning_rate": 3.998612341903184e-07, "loss": 0.366, "step": 865, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.9545454545454546, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7265973254086181, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720821919084102, "success_rate.epoch.global": 0.8551564310544612, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979628422425033, "tokens_p.mean_in_band": 0.5279947916666666, "tokens_rate.above_band": 0.9922380336351876, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007761966364812419 }, { "epoch": 0.18534299105240734, "grad_norm": 241.04134938089493, "learning_rate": 3.998583777489626e-07, "loss": 0.5057, "step": 870, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9015748031496063, "success_rate.epoch.env.math": 0.9548254620123203, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7263313609467456, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718944065542978, "success_rate.epoch.global": 0.8548387096774194, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997590782122905, "tokens_p.mean_in_band": 0.4397786458333333, "tokens_rate.above_band": 0.9738846572361263, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026115342763873776 }, { "epoch": 0.1864081806561568, "grad_norm": 81.37827634038625, "learning_rate": 3.9985549225910747e-07, "loss": 0.4182, "step": 875, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9015748031496063, "success_rate.epoch.env.math": 0.955193482688391, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.727540500736377, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.871063487600561, "success_rate.epoch.global": 0.8550973654066437, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973714953271028, "tokens_p.mean_in_band": 0.7319711538461539, "tokens_rate.above_band": 0.9427312775330396, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05726872246696035 }, { "epoch": 0.18747337025990626, "grad_norm": 57.99123672846491, "learning_rate": 3.99852577722856e-07, "loss": 0.5053, "step": 880, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8984375, "success_rate.epoch.env.math": 0.9553752535496958, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.726207906295754, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707211548106322, "success_rate.epoch.global": 0.8541310541310542, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6666666666666666, "tokens_p.mean_above_band": 0.9952400662251656, "tokens_p.mean_in_band": 0.4934895833333333, "tokens_rate.above_band": 0.9096385542168675, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09036144578313253 }, { "epoch": 0.18853855986365572, "grad_norm": 135.74567562821284, "learning_rate": 3.998496341423323e-07, "loss": 0.5965, "step": 885, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8984375, "success_rate.epoch.env.math": 0.9516129032258065, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7260869565217392, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703681275290045, "success_rate.epoch.global": 0.8526912181303116, "success_rate.window.env.math": 0.3333333333333333, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5238095238095238, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9849901574803149, "tokens_p.mean_in_band": 0.59375, "tokens_rate.above_band": 0.8141025641025641, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1858974358974359 }, { "epoch": 0.18960374946740519, "grad_norm": 76.23163641437583, "learning_rate": 3.9984666151968154e-07, "loss": 0.3778, "step": 890, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8984375, "success_rate.epoch.env.math": 0.9519038076152304, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7241379310344828, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702304284170491, "success_rate.epoch.global": 0.851830985915493, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9923611111111111, "tokens_p.mean_in_band": 0.45390625, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 0.19066893907115467, "grad_norm": 70.31600878436407, "learning_rate": 3.998436598570703e-07, "loss": 0.3689, "step": 895, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8992248062015504, "success_rate.epoch.env.math": 0.9524752475247524, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7249283667621776, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704258085842098, "success_rate.epoch.global": 0.8526610644257703, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9876179245283019, "tokens_p.mean_in_band": 0.82421875, "tokens_rate.above_band": 0.9636363636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03636363636363636 }, { "epoch": 0.19173412867490414, "grad_norm": 198.68709370146894, "learning_rate": 3.99840629156686e-07, "loss": 0.3285, "step": 900, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.952755905511811, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7254623044096729, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705703348963009, "success_rate.epoch.global": 0.852924791086351, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981231231231231, "tokens_p.mean_in_band": 0.63916015625, "tokens_rate.above_band": 0.9765395894428153, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02346041055718475 }, { "epoch": 0.1927993182786536, "grad_norm": 115.64974806030389, "learning_rate": 3.998375694207375e-07, "loss": 0.3953, "step": 905, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9003831417624522, "success_rate.epoch.env.math": 0.9528487229862476, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7257383966244726, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870638703210091, "success_rate.epoch.global": 0.8526315789473684, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9913194444444444, "tokens_p.mean_in_band": 0.3875, "tokens_rate.above_band": 0.9152542372881356, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0847457627118644 }, { "epoch": 0.19386450788240306, "grad_norm": 196.30656246782092, "learning_rate": 3.9983448065145473e-07, "loss": 0.348, "step": 910, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9003831417624522, "success_rate.epoch.env.math": 0.9529411764705882, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7284122562674095, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8709722664118953, "success_rate.epoch.global": 0.8534435261707989, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997235254691689, "tokens_p.mean_in_band": 0.709375, "tokens_rate.above_band": 0.9933422103861518, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006657789613848202 }, { "epoch": 0.19492969748615255, "grad_norm": 63.38646157867469, "learning_rate": 3.998313628510887e-07, "loss": 0.4932, "step": 915, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9007633587786259, "success_rate.epoch.env.math": 0.953307392996109, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7285318559556787, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710963945037352, "success_rate.epoch.global": 0.8536986301369863, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982255520504731, "tokens_p.mean_in_band": 0.478515625, "tokens_rate.above_band": 0.9875389408099688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012461059190031152 }, { "epoch": 0.195994887089902, "grad_norm": 38.91201492250749, "learning_rate": 3.9982821602191167e-07, "loss": 0.3058, "step": 920, "success_rate.epoch.env.abd": 0.9882352941176471, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.8973384030418251, "success_rate.epoch.env.math": 0.9534883720930233, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7286501377410468, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710064906203017, "success_rate.epoch.global": 0.8534059945504087, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975149105367793, "tokens_p.mean_in_band": 0.4923177083333333, "tokens_rate.above_band": 0.971042471042471, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02895752895752896 }, { "epoch": 0.19706007669365147, "grad_norm": 56.016960219651835, "learning_rate": 3.99825040166217e-07, "loss": 0.2913, "step": 925, "success_rate.epoch.env.abd": 0.9882352941176471, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.8977272727272727, "success_rate.epoch.env.math": 0.953757225433526, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7291381668946648, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8711449293553977, "success_rate.epoch.global": 0.8536585365853658, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978146853146853, "tokens_p.mean_in_band": 0.51640625, "tokens_rate.above_band": 0.9828178694158075, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01718213058419244 }, { "epoch": 0.19812526629740093, "grad_norm": 94.65933006220571, "learning_rate": 3.998218352863192e-07, "loss": 0.2486, "step": 930, "success_rate.epoch.env.abd": 0.9883720930232558, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8981132075471698, "success_rate.epoch.env.math": 0.9540229885057471, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7302452316076294, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713606879551405, "success_rate.epoch.global": 0.8544474393530997, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966364970645792, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9922330097087378, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007766990291262136 }, { "epoch": 0.1991904559011504, "grad_norm": 141.17268123939456, "learning_rate": 3.9981860138455407e-07, "loss": 0.3464, "step": 935, "success_rate.epoch.env.abd": 0.9883720930232558, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.8981132075471698, "success_rate.epoch.env.math": 0.9542857142857143, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7303523035230353, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8694932744477022, "success_rate.epoch.global": 0.8536193029490616, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9878554502369669, "tokens_p.mean_in_band": 0.6954296875, "tokens_rate.above_band": 0.8635743519781719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1364256480218281 }, { "epoch": 0.20025564550489988, "grad_norm": 38.756388962269526, "learning_rate": 3.9981533846327834e-07, "loss": 0.3103, "step": 940, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.8984962406015038, "success_rate.epoch.env.math": 0.9546313799621928, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7314439946018894, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696827890315908, "success_rate.epoch.global": 0.8544, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972278225806451, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.992, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008 }, { "epoch": 0.20132083510864934, "grad_norm": 39.895267565989755, "learning_rate": 3.998120465248701e-07, "loss": 0.3391, "step": 945, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.8955223880597015, "success_rate.epoch.env.math": 0.9548022598870056, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7295850066934404, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869258974438369, "success_rate.epoch.global": 0.8530503978779841, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9918376865671642, "tokens_p.mean_in_band": 0.6015625, "tokens_rate.above_band": 0.8993288590604027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10067114093959731 }, { "epoch": 0.2023860247123988, "grad_norm": 84.84923260523423, "learning_rate": 3.9980872557172846e-07, "loss": 0.4443, "step": 950, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9468085106382979, "success_rate.epoch.env.logic": 0.895910780669145, "success_rate.epoch.env.math": 0.9552238805970149, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7299465240641712, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8694492370018672, "success_rate.epoch.global": 0.8538258575197889, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990684281842819, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.20345121431614827, "grad_norm": 91.18217686793365, "learning_rate": 3.9980537560627366e-07, "loss": 0.2901, "step": 955, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9468085106382979, "success_rate.epoch.env.logic": 0.895910780669145, "success_rate.epoch.env.math": 0.9555555555555556, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7317397078353254, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696730150988123, "success_rate.epoch.global": 0.8545931758530184, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960069444444445, "tokens_p.mean_in_band": 0.7375, "tokens_rate.above_band": 0.972972972972973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02702702702702703 }, { "epoch": 0.20451640391989775, "grad_norm": 293.4925344821206, "learning_rate": 3.9980199663094723e-07, "loss": 0.4795, "step": 960, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8962962962962963, "success_rate.epoch.env.math": 0.9558823529411765, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7318361955085865, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697974433806638, "success_rate.epoch.global": 0.8548302872062663, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963329081632653, "tokens_p.mean_in_band": 0.6484375, "tokens_rate.above_band": 0.9849246231155779, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01507537688442211 }, { "epoch": 0.20558159352364722, "grad_norm": 71.09037694143568, "learning_rate": 3.997985886482116e-07, "loss": 0.3162, "step": 965, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9479166666666666, "success_rate.epoch.env.logic": 0.8974358974358975, "success_rate.epoch.env.math": 0.9560439560439561, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7315789473684211, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699717049298389, "success_rate.epoch.global": 0.8550649350649351, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993512110726643, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9942660550458715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005733944954128441 }, { "epoch": 0.20664678312739668, "grad_norm": 107.20650782072696, "learning_rate": 3.997951516605506e-07, "loss": 0.319, "step": 970, "success_rate.epoch.env.abd": 0.9887640449438202, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9479166666666666, "success_rate.epoch.env.logic": 0.8978102189781022, "success_rate.epoch.env.math": 0.9561243144424132, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7323759791122716, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870097104292745, "success_rate.epoch.global": 0.8552971576227391, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934573002754821, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.989100817438692, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010899182561307902 }, { "epoch": 0.20771197273114614, "grad_norm": 150.3110438257762, "learning_rate": 3.99791685670469e-07, "loss": 0.6073, "step": 975, "success_rate.epoch.env.abd": 0.9887640449438202, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9484536082474226, "success_rate.epoch.env.logic": 0.8985507246376812, "success_rate.epoch.env.math": 0.956442831215971, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7321196358907672, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702188880011441, "success_rate.epoch.global": 0.8555269922879177, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951746323529411, "tokens_p.mean_in_band": 0.5915798611111112, "tokens_rate.above_band": 0.9379310344827586, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06206896551724138 }, { "epoch": 0.2087771623348956, "grad_norm": 65.41919357601023, "learning_rate": 3.9978819068049294e-07, "loss": 0.2164, "step": 980, "success_rate.epoch.env.abd": 0.9887640449438202, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.8985507246376812, "success_rate.epoch.env.math": 0.9566787003610109, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7325581395348837, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704599548364474, "success_rate.epoch.global": 0.8557544757033249, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983700539568345, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.78515625, "tokens_rate.above_band": 0.9946332737030411, "tokens_rate.below_band": 0.0017889087656529517, "tokens_rate.in_band": 0.0035778175313059034 }, { "epoch": 0.2098423519386451, "grad_norm": 125.19739449705256, "learning_rate": 3.997846666931694e-07, "loss": 0.4152, "step": 985, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9494949494949495, "success_rate.epoch.env.logic": 0.8992805755395683, "success_rate.epoch.env.math": 0.9568345323741008, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7339331619537275, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707236736489115, "success_rate.epoch.global": 0.8564885496183207, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996510152284264, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9949494949494949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005050505050505051 }, { "epoch": 0.21090754154239455, "grad_norm": 50.78380193167762, "learning_rate": 3.997811137110666e-07, "loss": 0.4005, "step": 990, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9405940594059405, "success_rate.epoch.env.logic": 0.899641577060932, "success_rate.epoch.env.math": 0.956989247311828, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7343550446998723, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699997381140231, "success_rate.epoch.global": 0.8562025316455696, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9927991548042705, "tokens_p.mean_in_band": 0.5703828828828829, "tokens_rate.above_band": 0.8350668647845468, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1649331352154532 }, { "epoch": 0.211972731146144, "grad_norm": 336.6614214411064, "learning_rate": 3.997775317367741e-07, "loss": 0.2875, "step": 995, "success_rate.epoch.env.abd": 0.989010989010989, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.900709219858156, "success_rate.epoch.env.math": 0.9571428571428572, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7350318471337579, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702648153480733, "success_rate.epoch.global": 0.856926952141058, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989367219917012, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.21303792074989347, "grad_norm": 101.31429936007149, "learning_rate": 3.9977392077290223e-07, "loss": 0.3308, "step": 1000, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.901060070671378, "success_rate.epoch.env.math": 0.9555160142348754, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7341772151898734, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700926061461028, "success_rate.epoch.global": 0.856140350877193, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.775, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9930862831858407, "tokens_p.mean_in_band": 0.300101902173913, "tokens_rate.above_band": 0.8308823529411765, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16911764705882354 }, { "epoch": 0.21410311035364296, "grad_norm": 77.68227255786232, "learning_rate": 3.997702808220828e-07, "loss": 0.2753, "step": 1005, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.941747572815534, "success_rate.epoch.env.logic": 0.901060070671378, "success_rate.epoch.env.math": 0.9558303886925795, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7342569269521411, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703074072812303, "success_rate.epoch.global": 0.856359102244389, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997750946969697, "tokens_p.mean_in_band": 0.6380208333333334, "tokens_rate.above_band": 0.9887640449438202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011235955056179775 }, { "epoch": 0.21516829995739242, "grad_norm": 992.9681403865821, "learning_rate": 3.997666118869684e-07, "loss": 0.6206, "step": 1010, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9423076923076923, "success_rate.epoch.env.logic": 0.9020979020979021, "success_rate.epoch.env.math": 0.9543859649122807, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7349246231155779, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703820639450321, "success_rate.epoch.global": 0.856575682382134, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9935787671232876, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9776785714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022321428571428572 }, { "epoch": 0.21623348956114188, "grad_norm": 136.27811542989434, "learning_rate": 3.9976291397023315e-07, "loss": 0.3294, "step": 1015, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9024390243902439, "success_rate.epoch.env.math": 0.9546247818499127, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7346683354192741, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705379426704731, "success_rate.epoch.global": 0.8567901234567902, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981009224091155, "tokens_p.mean_in_band": 0.5725446428571429, "tokens_rate.above_band": 0.9962162162162163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0037837837837837837 }, { "epoch": 0.21729867916489135, "grad_norm": 70.46154311145497, "learning_rate": 3.9975918707457187e-07, "loss": 0.3256, "step": 1020, "success_rate.epoch.env.abd": 0.9893617021276596, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.8996539792387543, "success_rate.epoch.env.math": 0.9547826086956521, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7325870646766169, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701202973296819, "success_rate.epoch.global": 0.8555282555282555, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.725, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9952256944444444, "tokens_p.mean_in_band": 0.6244480298913043, "tokens_rate.above_band": 0.9616026711185309, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038397328881469114 }, { "epoch": 0.2183638687686408, "grad_norm": 70.3106937233737, "learning_rate": 3.9975543120270083e-07, "loss": 0.2602, "step": 1025, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.8996539792387543, "success_rate.epoch.env.math": 0.9549393414211439, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7336621454993835, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702424605688751, "success_rate.epoch.global": 0.8557457212713936, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9926321138211383, "tokens_p.mean_in_band": 0.6272321428571429, "tokens_rate.above_band": 0.9461538461538461, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05384615384615385 }, { "epoch": 0.2194290583723903, "grad_norm": 45.41832513907574, "learning_rate": 3.997516463573571e-07, "loss": 0.4279, "step": 1030, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.8979591836734694, "success_rate.epoch.env.math": 0.9535283993115319, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7339901477832512, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699899391696906, "success_rate.epoch.global": 0.8554744525547445, "success_rate.window.env.logic": 0.8, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0006443298969072, "tokens_p.mean_in_band": 0.58549072265625, "tokens_rate.above_band": 0.9644886363636364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03551136363636364 }, { "epoch": 0.22049424797613976, "grad_norm": 72.21395780256603, "learning_rate": 3.997478325412993e-07, "loss": 0.2947, "step": 1035, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.8993288590604027, "success_rate.epoch.env.math": 0.952054794520548, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7334152334152334, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700506625855755, "success_rate.epoch.global": 0.8552058111380145, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977072010869565, "tokens_p.mean_in_band": 0.47380514705882354, "tokens_rate.above_band": 0.9558441558441558, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04415584415584416 }, { "epoch": 0.22155943757988922, "grad_norm": 106.13585676785776, "learning_rate": 3.997439897573067e-07, "loss": 0.5819, "step": 1040, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.8996655518394648, "success_rate.epoch.env.math": 0.9507640067911715, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7347188264058679, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700824351346955, "success_rate.epoch.global": 0.8554216867469879, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.991504854368932, "tokens_p.mean_in_band": 0.4294704861111111, "tokens_rate.above_band": 0.9196428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08035714285714286 }, { "epoch": 0.22262462718363868, "grad_norm": 69.45850710137455, "learning_rate": 3.9974011800818e-07, "loss": 0.4525, "step": 1045, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.8996655518394648, "success_rate.epoch.env.math": 0.9509306260575296, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7345454545454545, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701083873323242, "success_rate.epoch.global": 0.8551558752997602, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9937150837988827, "tokens_p.mean_in_band": 0.4783380681818182, "tokens_rate.above_band": 0.9421052631578948, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05789473684210526 }, { "epoch": 0.22368981678738817, "grad_norm": 41.56704083564087, "learning_rate": 3.997362172967409e-07, "loss": 0.301, "step": 1050, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.900990099009901, "success_rate.epoch.env.math": 0.9511784511784511, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7355072463768116, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703387658889348, "success_rate.epoch.global": 0.8558472553699285, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974226804123711, "tokens_p.mean_in_band": 0.625, "tokens_rate.above_band": 0.9797979797979798, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020202020202020204 }, { "epoch": 0.22475500639113763, "grad_norm": 42.018910693897, "learning_rate": 3.997322876258321e-07, "loss": 0.173, "step": 1055, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9013157894736842, "success_rate.epoch.env.math": 0.9513422818791947, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7365269461077845, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704759677885255, "success_rate.epoch.global": 0.8560570071258907, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925, "tokens_p.mean_in_band": 0.6135817307692307, "tokens_rate.above_band": 0.8849557522123894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11504424778761062 }, { "epoch": 0.2258201959948871, "grad_norm": 50.53699077668399, "learning_rate": 3.997283289983177e-07, "loss": 0.292, "step": 1060, "success_rate.epoch.env.abd": 0.9895833333333334, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9016393442622951, "success_rate.epoch.env.math": 0.9515050167224081, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7384066587395958, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707010270054748, "success_rate.epoch.global": 0.8567375886524823, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954108391608392, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9794520547945206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02054794520547945 }, { "epoch": 0.22688538559863655, "grad_norm": 153.33174535054349, "learning_rate": 3.997243414170826e-07, "loss": 0.4433, "step": 1065, "success_rate.epoch.env.abd": 0.9896907216494846, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9019607843137255, "success_rate.epoch.env.math": 0.9516666666666667, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7384615384615385, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86930247461503, "success_rate.epoch.global": 0.8564705882352941, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973072562358276, "tokens_p.mean_in_band": 0.45458984375, "tokens_rate.above_band": 0.9910112359550561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008988764044943821 }, { "epoch": 0.227950575202386, "grad_norm": 58.014062366038246, "learning_rate": 3.9972032488503296e-07, "loss": 0.2734, "step": 1070, "success_rate.epoch.env.abd": 0.9896907216494846, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9022801302931596, "success_rate.epoch.env.math": 0.9517470881863561, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.74, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869553233644293, "success_rate.epoch.global": 0.8571428571428571, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988473360655737, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.2290157648061355, "grad_norm": 590.3894529474009, "learning_rate": 3.997162794050959e-07, "loss": 0.2279, "step": 1075, "success_rate.epoch.env.abd": 0.9897959183673469, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9025974025974026, "success_rate.epoch.env.math": 0.9518272425249169, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7406542056074766, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696816741309467, "success_rate.epoch.global": 0.8573426573426574, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992505081300813, "tokens_p.mean_below_band": 5.617039278149605e-09, "tokens_p.mean_in_band": 0.7265625, "tokens_rate.above_band": 0.9879518072289156, "tokens_rate.below_band": 0.004016064257028112, "tokens_rate.in_band": 0.008032128514056224 }, { "epoch": 0.23008095440988496, "grad_norm": 61.649239684523124, "learning_rate": 3.9971220498021985e-07, "loss": 0.2757, "step": 1080, "success_rate.epoch.env.abd": 0.9897959183673469, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.9520661157024793, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7418604651162791, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8698701774528171, "success_rate.epoch.global": 0.8580046403712297, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982348326359832, "tokens_p.mean_in_band": 0.81015625, "tokens_rate.above_band": 0.989648033126294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010351966873706004 }, { "epoch": 0.23114614401363442, "grad_norm": 22.105312534211336, "learning_rate": 3.9970810161337427e-07, "loss": 0.3216, "step": 1085, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9038461538461539, "success_rate.epoch.env.math": 0.9521452145214522, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7410404624277457, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699157888942909, "success_rate.epoch.global": 0.8577367205542725, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998477564102564, "tokens_p.mean_in_band": 0.4984375, "tokens_rate.above_band": 0.975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025 }, { "epoch": 0.23221133361738389, "grad_norm": 44.231131899712054, "learning_rate": 3.997039693075495e-07, "loss": 0.2264, "step": 1090, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9454545454545454, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.9522240527182867, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7416378316032295, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701748866371258, "success_rate.epoch.global": 0.8583908045977011, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999488543371522, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9967373572593801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0032626427406199023 }, { "epoch": 0.23327652322113338, "grad_norm": 99.58092276063034, "learning_rate": 3.9969980806575724e-07, "loss": 0.2318, "step": 1095, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9454545454545454, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.9525368248772504, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7434135166093929, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703647463794101, "success_rate.epoch.global": 0.8590389016018307, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9928463855421686, "tokens_p.mean_in_band": 0.81015625, "tokens_rate.above_band": 0.9431818181818182, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056818181818181816 }, { "epoch": 0.23434171282488284, "grad_norm": 202.14236062783922, "learning_rate": 3.9969561789103016e-07, "loss": 0.3987, "step": 1100, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9056603773584906, "success_rate.epoch.env.math": 0.9526916802610114, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7431506849315068, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705031355984325, "success_rate.epoch.global": 0.8592255125284738, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993421052631579, "tokens_p.mean_in_band": 0.6255580357142857, "tokens_rate.above_band": 0.9809782608695652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019021739130434784 }, { "epoch": 0.2354069024286323, "grad_norm": 107.93231017247012, "learning_rate": 3.99691398786422e-07, "loss": 0.3863, "step": 1105, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9065420560747663, "success_rate.epoch.env.math": 0.9529983792544571, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7440273037542662, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8706908625559305, "success_rate.epoch.global": 0.8598639455782313, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9914383561643836, "tokens_p.mean_in_band": 0.7700892857142857, "tokens_rate.above_band": 0.9125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0875 }, { "epoch": 0.23647209203238176, "grad_norm": 303.380145269383, "learning_rate": 3.996871507550077e-07, "loss": 0.5499, "step": 1110, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9122807017543859, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.906832298136646, "success_rate.epoch.env.math": 0.9532258064516129, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7429218573046432, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708237047679432, "success_rate.epoch.global": 0.8595936794582393, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977777777777778, "tokens_p.mean_in_band": 0.5796342329545454, "tokens_rate.above_band": 0.9684361549497847, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03156384505021521 }, { "epoch": 0.23753728163613122, "grad_norm": 172.28347523897557, "learning_rate": 3.9968287379988305e-07, "loss": 0.325, "step": 1115, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9385964912280702, "success_rate.epoch.env.logic": 0.9074074074074074, "success_rate.epoch.env.math": 0.9533011272141707, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7440811724915445, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704137108328169, "success_rate.epoch.global": 0.8597752808988764, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930718570683359, "tokens_p.mean_in_band": 0.5292215616966581, "tokens_rate.above_band": 0.907885389533507, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09211461046649301 }, { "epoch": 0.2386024712398807, "grad_norm": 96.41067664182437, "learning_rate": 3.996785679241652e-07, "loss": 0.3093, "step": 1120, "success_rate.epoch.env.abd": 0.99, "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9049079754601227, "success_rate.epoch.env.math": 0.9535256410256411, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7446569178852643, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703381545734128, "success_rate.epoch.global": 0.8599552572706936, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987399193548387, "tokens_p.mean_in_band": 0.74921875, "tokens_rate.above_band": 0.986737400530504, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013262599469496022 }, { "epoch": 0.23966766084363017, "grad_norm": 94.36686829498194, "learning_rate": 3.996742331309921e-07, "loss": 0.2574, "step": 1125, "success_rate.epoch.env.abd": 0.9901960784313726, "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9054878048780488, "success_rate.epoch.env.math": 0.9535256410256411, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7449664429530202, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704368302931449, "success_rate.epoch.global": 0.8601336302895323, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925595238095238, "tokens_p.mean_in_band": 0.7265625, "tokens_rate.above_band": 0.9130434782608695, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08695652173913043 }, { "epoch": 0.24073285044737963, "grad_norm": 134.93265771483846, "learning_rate": 3.9966986942352307e-07, "loss": 0.3389, "step": 1130, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9057750759878419, "success_rate.epoch.env.math": 0.9538950715421304, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7452513966480447, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697981178696821, "success_rate.epoch.global": 0.8603104212860311, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975806451612903, "tokens_p.mean_in_band": 0.65390625, "tokens_rate.above_band": 0.9393939393939394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06060606060606061 }, { "epoch": 0.2417980400511291, "grad_norm": 326.84204609725606, "learning_rate": 3.9966547680493825e-07, "loss": 0.4822, "step": 1135, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.906060606060606, "success_rate.epoch.env.math": 0.9542586750788643, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.744988864142539, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652878088791357, "success_rate.epoch.global": 0.8600441501103753, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9962797619047619, "tokens_p.mean_in_band": 0.6243265086206896, "tokens_rate.above_band": 0.9559939301972686, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04400606980273141 }, { "epoch": 0.24286322965487858, "grad_norm": 156.44290078317317, "learning_rate": 3.99661055278439e-07, "loss": 0.4755, "step": 1140, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9063444108761329, "success_rate.epoch.env.math": 0.95141065830721, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7439024390243902, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650843354522499, "success_rate.epoch.global": 0.8589010989010989, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9955119680851063, "tokens_p.mean_in_band": 0.49148995535714285, "tokens_rate.above_band": 0.9641025641025641, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035897435897435895 }, { "epoch": 0.24392841925862804, "grad_norm": 76.93919193952442, "learning_rate": 3.996566048472477e-07, "loss": 0.355, "step": 1145, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9396551724137931, "success_rate.epoch.env.logic": 0.9066265060240963, "success_rate.epoch.env.math": 0.9516380655226209, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7450331125827815, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8645235700176696, "success_rate.epoch.global": 0.8590809628008753, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963118654822335, "tokens_p.mean_in_band": 0.6280048076923077, "tokens_rate.above_band": 0.9680589680589681, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03194103194103194 }, { "epoch": 0.2449936088623775, "grad_norm": 696.2384141161228, "learning_rate": 3.996521255146077e-07, "loss": 0.4063, "step": 1150, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9193548387096774, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9396551724137931, "success_rate.epoch.env.logic": 0.9039039039039038, "success_rate.epoch.env.math": 0.951937984496124, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7447744774477447, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8645241930868773, "success_rate.epoch.global": 0.8588235294117647, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982822410147991, "tokens_p.mean_in_band": 0.6394675925925926, "tokens_rate.above_band": 0.946, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054 }, { "epoch": 0.24605879846612697, "grad_norm": 71.07902201431295, "learning_rate": 3.996476172837836e-07, "loss": 0.39, "step": 1155, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9401709401709402, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.9521604938271605, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7442371020856202, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8647368280422686, "success_rate.epoch.global": 0.8590021691973969, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978060787671232, "tokens_p.mean_in_band": 0.2152777777777778, "tokens_rate.above_band": 0.984822934232715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01517706576728499 }, { "epoch": 0.24712398806987643, "grad_norm": 95.64706189143268, "learning_rate": 3.99643080158061e-07, "loss": 0.3741, "step": 1160, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9401709401709402, "success_rate.epoch.env.logic": 0.9050445103857567, "success_rate.epoch.env.math": 0.9523809523809523, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7456331877729258, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642410288321302, "success_rate.epoch.global": 0.8591792656587472, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973958333333334, "tokens_p.mean_in_band": 0.6473524305555556, "tokens_rate.above_band": 0.8888888888888888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1111111111111111 }, { "epoch": 0.24818917767362592, "grad_norm": 64.57519426962992, "learning_rate": 3.996385141407464e-07, "loss": 0.3125, "step": 1165, "success_rate.epoch.env.abd": 0.9903846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.940677966101695, "success_rate.epoch.env.logic": 0.9029411764705882, "success_rate.epoch.env.math": 0.9526717557251908, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7459105779716467, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641560505418792, "success_rate.epoch.global": 0.8593548387096774, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955180921052632, "tokens_p.mean_in_band": 0.5962171052631579, "tokens_rate.above_band": 0.975609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024390243902439025 }, { "epoch": 0.24925436727737538, "grad_norm": 162.48890252307692, "learning_rate": 3.9963391923516754e-07, "loss": 0.4745, "step": 1170, "success_rate.epoch.env.abd": 0.9903846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9008746355685131, "success_rate.epoch.env.math": 0.952887537993921, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7456521739130435, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632456851282477, "success_rate.epoch.global": 0.8586723768736617, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9827567911714771, "tokens_p.mean_below_band": 5.066394805908203e-07, "tokens_p.mean_in_band": 0.4876067895683453, "tokens_rate.above_band": 0.6282666666666666, "tokens_rate.below_band": 0.0010666666666666667, "tokens_rate.in_band": 0.37066666666666664 }, { "epoch": 0.25031955688112484, "grad_norm": 34.11008594390763, "learning_rate": 3.9962929544467316e-07, "loss": 0.2818, "step": 1175, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9017341040462428, "success_rate.epoch.env.math": 0.9532428355957768, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.745928338762215, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633895493119144, "success_rate.epoch.global": 0.8592750533049041, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963662790697675, "tokens_p.mean_in_band": 0.8681640625, "tokens_rate.above_band": 0.9699248120300752, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03007518796992481 }, { "epoch": 0.2513847464848743, "grad_norm": 147.7128524768551, "learning_rate": 3.996246427726331e-07, "loss": 0.3735, "step": 1180, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9020172910662824, "success_rate.epoch.env.math": 0.9533834586466166, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7464940668824164, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634795073292852, "success_rate.epoch.global": 0.8593882752761257, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9893465909090909, "tokens_p.mean_in_band": 0.6871744791666666, "tokens_rate.above_band": 0.9361702127659575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06382978723404255 }, { "epoch": 0.25244993608862376, "grad_norm": 485.1256599952441, "learning_rate": 3.9961996122243804e-07, "loss": 0.8177, "step": 1185, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9025787965616046, "success_rate.epoch.env.math": 0.9520958083832335, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7446351931330472, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632445056458825, "success_rate.epoch.global": 0.8582910321489001, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.6888888888888888, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9884105960264901, "tokens_p.mean_in_band": 0.5148050462877428, "tokens_rate.above_band": 0.8031914893617021, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19680851063829788 }, { "epoch": 0.2535151256923732, "grad_norm": 30.907029672898215, "learning_rate": 3.9961525079750005e-07, "loss": 0.178, "step": 1190, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.921875, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9036827195467422, "success_rate.epoch.env.math": 0.9522388059701492, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7451820128479657, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635712368951267, "success_rate.epoch.global": 0.858887952822241, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966584158415842, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9980237154150198, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001976284584980237 }, { "epoch": 0.2545803152961227, "grad_norm": 62.61177680183389, "learning_rate": 3.9961051150125193e-07, "loss": 0.2963, "step": 1195, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9042253521126761, "success_rate.epoch.env.math": 0.9524517087667161, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7449306296691569, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637345025787883, "success_rate.epoch.global": 0.8590604026845637, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980182926829269, "tokens_p.mean_in_band": 0.7434895833333334, "tokens_rate.above_band": 0.9715639810426541, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02843601895734597 }, { "epoch": 0.2556455048998722, "grad_norm": 232.10678464266607, "learning_rate": 3.996057433371477e-07, "loss": 0.403, "step": 1200, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9242424242424242, "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.901685393258427, "success_rate.epoch.env.math": 0.9525925925925925, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7457446808510638, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637562515649119, "success_rate.epoch.global": 0.8592314118629908, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982069672131147, "tokens_p.mean_in_band": 0.7373798076923077, "tokens_rate.above_band": 0.9740518962075848, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02594810379241517 }, { "epoch": 0.25671069450362166, "grad_norm": 60.46428905885166, "learning_rate": 3.996009463086623e-07, "loss": 0.1888, "step": 1205, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9242424242424242, "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9022346368715084, "success_rate.epoch.env.math": 0.9514705882352941, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7465535524920467, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637777161918906, "success_rate.epoch.global": 0.8594009983361065, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929347826086956, "tokens_p.mean_in_band": 0.4879557291666667, "tokens_rate.above_band": 0.9745762711864406, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025423728813559324 }, { "epoch": 0.2577758841073711, "grad_norm": 84.49690322959322, "learning_rate": 3.995961204192918e-07, "loss": 0.234, "step": 1210, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9253731343283582, "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9025069637883009, "success_rate.epoch.env.math": 0.9518248175182482, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7463002114164905, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639144366655384, "success_rate.epoch.global": 0.8595691797845899, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977678571428571, "tokens_p.mean_in_band": 0.66484375, "tokens_rate.above_band": 0.9685534591194969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031446540880503145 }, { "epoch": 0.2588410737111206, "grad_norm": 88.24454046746219, "learning_rate": 3.995912656725533e-07, "loss": 0.4847, "step": 1215, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9033149171270718, "success_rate.epoch.env.math": 0.9520348837209303, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7457805907172996, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627796346474436, "success_rate.epoch.global": 0.8593234323432343, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974489795918368, "tokens_p.mean_in_band": 0.675537109375, "tokens_rate.above_band": 0.9821826280623608, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017817371937639197 }, { "epoch": 0.25990626331487005, "grad_norm": 34.44953619460801, "learning_rate": 3.9958638207198493e-07, "loss": 0.2793, "step": 1220, "success_rate.epoch.env.abd": 0.9907407407407407, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9033149171270718, "success_rate.epoch.env.math": 0.9523121387283237, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7465825446898002, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862930209781235, "success_rate.epoch.global": 0.8599013968775678, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956647398843931, "tokens_p.mean_in_band": 0.746875, "tokens_rate.above_band": 0.9719101123595506, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028089887640449437 }, { "epoch": 0.2609714529186195, "grad_norm": 131.53498135573238, "learning_rate": 3.9958146962114574e-07, "loss": 0.4549, "step": 1225, "success_rate.epoch.env.abd": 0.990909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9033149171270718, "success_rate.epoch.env.math": 0.9512195121951219, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7460650577124869, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627991403864929, "success_rate.epoch.global": 0.8596563011456628, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987610132158591, "tokens_p.mean_in_band": 0.416015625, "tokens_rate.above_band": 0.9659574468085106, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03404255319148936 }, { "epoch": 0.26203664252236897, "grad_norm": 37.297637371452424, "learning_rate": 3.995765283236159e-07, "loss": 0.4461, "step": 1230, "success_rate.epoch.env.abd": 0.9910714285714286, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9338842975206612, "success_rate.epoch.env.logic": 0.9038461538461539, "success_rate.epoch.env.math": 0.9513590844062947, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.74581589958159, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629023179000406, "success_rate.epoch.global": 0.8598207008964955, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968993190661478, "tokens_p.mean_in_band": 0.3782552083333333, "tokens_rate.above_band": 0.9884615384615385, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011538461538461539 }, { "epoch": 0.26310183212611843, "grad_norm": 63.362707045834405, "learning_rate": 3.9957155818299666e-07, "loss": 0.2566, "step": 1235, "success_rate.epoch.env.abd": 0.9910714285714286, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9349593495934959, "success_rate.epoch.env.logic": 0.9038461538461539, "success_rate.epoch.env.math": 0.9514978601997147, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7460978147762747, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630738614789195, "success_rate.epoch.global": 0.8599837662337663, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974290780141843, "tokens_p.mean_in_band": 0.5837053571428571, "tokens_rate.above_band": 0.9901685393258427, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009831460674157303 }, { "epoch": 0.2641670217298679, "grad_norm": 24.107363541833237, "learning_rate": 3.995665592029102e-07, "loss": 0.2622, "step": 1240, "success_rate.epoch.env.abd": 0.9911504424778761, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9365079365079365, "success_rate.epoch.env.logic": 0.9016393442622951, "success_rate.epoch.env.math": 0.9516358463726885, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7455867082035307, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629872861004884, "success_rate.epoch.global": 0.8597413096200485, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957075315195754, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9960343688037012, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0039656311962987445 }, { "epoch": 0.2652322113336174, "grad_norm": 63.68066761821555, "learning_rate": 3.995615313869997e-07, "loss": 0.1878, "step": 1245, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9365079365079365, "success_rate.epoch.env.logic": 0.9019073569482289, "success_rate.epoch.env.math": 0.9518413597733711, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7453416149068323, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631189208083877, "success_rate.epoch.global": 0.8599033816425121, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997631195335277, "tokens_p.mean_in_band": 0.5546875, "tokens_rate.above_band": 0.9884726224783862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011527377521613832 }, { "epoch": 0.26629740093736687, "grad_norm": 122.19408019516926, "learning_rate": 3.9955647473892945e-07, "loss": 0.3302, "step": 1250, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9027027027027027, "success_rate.epoch.env.math": 0.9520451339915373, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7461300309597523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633268729477851, "success_rate.epoch.global": 0.8604651162790697, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9935747663551402, "tokens_p.mean_in_band": 0.724365234375, "tokens_rate.above_band": 0.963963963963964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036036036036036036 }, { "epoch": 0.26736259054111633, "grad_norm": 47.3893792152576, "learning_rate": 3.9955138926238467e-07, "loss": 0.3884, "step": 1255, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9005376344086021, "success_rate.epoch.env.math": 0.952247191011236, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7458847736625515, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631943713277078, "success_rate.epoch.global": 0.860223642172524, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955501618122977, "tokens_p.mean_in_band": 0.6724076704545454, "tokens_rate.above_band": 0.9335347432024169, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06646525679758308 }, { "epoch": 0.2684277801448658, "grad_norm": 82.82508494278193, "learning_rate": 3.9954627496107157e-07, "loss": 0.3856, "step": 1260, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9013333333333333, "success_rate.epoch.env.math": 0.9523809523809523, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7448770491803278, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632200164343063, "success_rate.epoch.global": 0.8599840891010342, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974563953488372, "tokens_p.mean_in_band": 0.49360795454545453, "tokens_rate.above_band": 0.8865979381443299, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1134020618556701 }, { "epoch": 0.26949296974861525, "grad_norm": 325.699270538047, "learning_rate": 3.9954113183871753e-07, "loss": 0.3237, "step": 1265, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9018567639257294, "success_rate.epoch.env.math": 0.9525801952580195, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.744138634046891, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632185853739451, "success_rate.epoch.global": 0.8597464342313788, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934129901960784, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9272727272727272, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07272727272727272 }, { "epoch": 0.2705581593523647, "grad_norm": 294.7283271922979, "learning_rate": 3.9953595989907073e-07, "loss": 0.3592, "step": 1270, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9021164021164021, "success_rate.epoch.env.math": 0.9525801952580195, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7439271255060729, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632616734977248, "success_rate.epoch.global": 0.8595106550907656, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961148648648649, "tokens_p.mean_in_band": 0.6058708639705882, "tokens_rate.above_band": 0.9158415841584159, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08415841584158416 }, { "epoch": 0.2716233489561142, "grad_norm": 115.02366969359464, "learning_rate": 3.9953075914590045e-07, "loss": 0.2804, "step": 1275, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.899736147757256, "success_rate.epoch.env.math": 0.952712100139082, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7449596774193549, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8620917261733244, "success_rate.epoch.global": 0.8592767295597484, "success_rate.window.env.agentgym:sciworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987203663793104, "tokens_p.mean_in_band": 0.7512019230769231, "tokens_rate.above_band": 0.9861849096705633, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01381509032943677 }, { "epoch": 0.27268853855986364, "grad_norm": 55.520837167877254, "learning_rate": 3.99525529582997e-07, "loss": 0.2886, "step": 1280, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9517906336088154, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7449596774193549, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8620759881584615, "success_rate.epoch.global": 0.8594361785434612, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959802904564315, "tokens_p.mean_in_band": 0.69140625, "tokens_rate.above_band": 0.9836734693877551, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0163265306122449 }, { "epoch": 0.2737537281636131, "grad_norm": 63.00918193490182, "learning_rate": 3.995202712141716e-07, "loss": 0.4008, "step": 1285, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9002624671916011, "success_rate.epoch.env.math": 0.9519230769230769, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7444889779559118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862163207042988, "success_rate.epoch.global": 0.859204368174727, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951785714285715, "tokens_p.mean_in_band": 0.4497327302631579, "tokens_rate.above_band": 0.9020618556701031, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0979381443298969 }, { "epoch": 0.2748189177673626, "grad_norm": 144.61545721448763, "learning_rate": 3.995149840432566e-07, "loss": 0.3814, "step": 1290, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9002624671916011, "success_rate.epoch.env.math": 0.9521857923497268, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7457627118644068, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623471437932043, "success_rate.epoch.global": 0.8597513597513597, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945469798657718, "tokens_p.mean_in_band": 0.7829241071428571, "tokens_rate.above_band": 0.9551282051282052, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04487179487179487 }, { "epoch": 0.2758841073711121, "grad_norm": 58.87006540030209, "learning_rate": 3.9950966807410513e-07, "loss": 0.1866, "step": 1295, "success_rate.epoch.env.abd": 0.9915254237288136, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9002624671916011, "success_rate.epoch.env.math": 0.952316076294278, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7462834489593657, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624196105589766, "success_rate.epoch.global": 0.8599071207430341, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9864864864864865, "tokens_p.mean_in_band": 0.6261160714285714, "tokens_rate.above_band": 0.940677966101695, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059322033898305086 }, { "epoch": 0.27694929697486154, "grad_norm": 157.23310614908453, "learning_rate": 3.9950432331059153e-07, "loss": 0.3177, "step": 1300, "success_rate.epoch.env.abd": 0.9915966386554622, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9007832898172323, "success_rate.epoch.env.math": 0.9525745257452575, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7460474308300395, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624754713656979, "success_rate.epoch.global": 0.8600616808018504, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9935247747747747, "tokens_p.mean_in_band": 0.56796875, "tokens_rate.above_band": 0.9173553719008265, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08264462809917356 }, { "epoch": 0.278014486578611, "grad_norm": 127.29586473619848, "learning_rate": 3.9949894975661096e-07, "loss": 0.3397, "step": 1305, "success_rate.epoch.env.abd": 0.9916666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9018087855297158, "success_rate.epoch.env.math": 0.9527027027027027, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7458128078817734, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625653875232313, "success_rate.epoch.global": 0.8602150537634409, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_in_band": 0.5600961538461539, "tokens_rate.above_band": 0.8898305084745762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11016949152542373 }, { "epoch": 0.27907967618236046, "grad_norm": 71.98095137110536, "learning_rate": 3.9949354741607967e-07, "loss": 0.4332, "step": 1310, "success_rate.epoch.env.abd": 0.9834710743801653, "success_rate.epoch.env.agentgym:alfworld": 0.9285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.8994845360824743, "success_rate.epoch.env.math": 0.9528301886792453, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7450980392156863, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615988019887783, "success_rate.epoch.global": 0.8592195868400918, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.52, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9870084269662921, "tokens_p.mean_in_band": 0.7301111355633803, "tokens_rate.above_band": 0.7899408284023669, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21005917159763313 }, { "epoch": 0.2801448657861099, "grad_norm": 141.9478287586167, "learning_rate": 3.9948811629293484e-07, "loss": 0.3183, "step": 1315, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9295774647887324, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9529569892473119, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7458455522971652, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618722873632115, "success_rate.epoch.global": 0.8597560975609756, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965930451127819, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.99812382739212, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001876172607879925 }, { "epoch": 0.2812100553898594, "grad_norm": 79.47000679132687, "learning_rate": 3.994826563911346e-07, "loss": 0.4147, "step": 1320, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.8976982097186701, "success_rate.epoch.env.math": 0.9530201342281879, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7461089494163424, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617937530547451, "success_rate.epoch.global": 0.8595292331055429, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7999999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988977597712106, "tokens_p.mean_in_band": 0.5001148897058824, "tokens_rate.above_band": 0.9686057248384118, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03139427516158818 }, { "epoch": 0.28227524499360884, "grad_norm": 67.74721412021016, "learning_rate": 3.9947716771465813e-07, "loss": 0.3213, "step": 1325, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.8954081632653061, "success_rate.epoch.env.math": 0.9532085561497327, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.746615087040619, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616487087904232, "success_rate.epoch.global": 0.859304084720121, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9923573369565217, "tokens_p.mean_in_band": 0.5833834134615384, "tokens_rate.above_band": 0.8761904761904762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12380952380952381 }, { "epoch": 0.2833404345973583, "grad_norm": 36.412287187478704, "learning_rate": 3.994716502675055e-07, "loss": 0.3005, "step": 1330, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.8931297709923665, "success_rate.epoch.env.math": 0.9533333333333334, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7456647398843931, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86145129412559, "success_rate.epoch.global": 0.8587038432554635, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9974502487562189, "tokens_p.mean_in_band": 0.47042410714285715, "tokens_rate.above_band": 0.9598853868194842, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04011461318051576 }, { "epoch": 0.2844056242011078, "grad_norm": 164.6989623658681, "learning_rate": 3.9946610405369783e-07, "loss": 0.368, "step": 1335, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.8931297709923665, "success_rate.epoch.env.math": 0.9537648612945839, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7454370797310279, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614698275626705, "success_rate.epoch.global": 0.8588588588588588, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9913366336633663, "tokens_p.mean_in_band": 0.654296875, "tokens_rate.above_band": 0.9619047619047619, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0380952380952381 }, { "epoch": 0.2854708138048573, "grad_norm": 155.86441965563125, "learning_rate": 3.9946052907727716e-07, "loss": 0.2241, "step": 1340, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8936708860759494, "success_rate.epoch.env.math": 0.9538866930171278, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7464114832535885, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605798565577174, "success_rate.epoch.global": 0.8590127150336574, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990981240981242, "tokens_p.mean_below_band": 3.841705620288849e-09, "tokens_p.mean_in_band": 0.8029513888888888, "tokens_rate.above_band": 0.9857752489331437, "tokens_rate.below_band": 0.001422475106685633, "tokens_rate.in_band": 0.012802275960170697 }, { "epoch": 0.28653600340860674, "grad_norm": 63.62741452945286, "learning_rate": 3.994549253423064e-07, "loss": 0.2889, "step": 1345, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8947368421052632, "success_rate.epoch.env.math": 0.9540078843626807, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7468958930276982, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860830218489722, "success_rate.epoch.global": 0.8595380029806259, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983974358974359, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.2876011930123562, "grad_norm": 117.86006513451086, "learning_rate": 3.9944929285286966e-07, "loss": 0.2065, "step": 1350, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8927680798004988, "success_rate.epoch.env.math": 0.9541284403669725, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7480988593155894, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609880105232104, "success_rate.epoch.global": 0.8596881959910914, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000401246223565, "tokens_p.mean_in_band": 0.49874441964285715, "tokens_rate.above_band": 0.9792899408284024, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020710059171597635 }, { "epoch": 0.28866638261610567, "grad_norm": 0.0, "learning_rate": 3.994436316130717e-07, "loss": 0.328, "step": 1355, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8930348258706468, "success_rate.epoch.env.math": 0.954367666232073, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7492904446546831, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861142333911787, "success_rate.epoch.global": 0.860207100591716, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9925595238095238, "tokens_p.mean_in_band": 0.86640625, "tokens_rate.above_band": 0.9438202247191011, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056179775280898875 }, { "epoch": 0.2897315722198551, "grad_norm": 43.621090843298276, "learning_rate": 3.9943794162703856e-07, "loss": 0.2098, "step": 1360, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8938271604938272, "success_rate.epoch.env.math": 0.9546044098573282, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7490566037735849, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612146282179086, "success_rate.epoch.global": 0.8603537214443626, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925742574257426, "tokens_p.mean_in_band": 0.4270833333333333, "tokens_rate.above_band": 0.8487394957983193, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15126050420168066 }, { "epoch": 0.2907967618236046, "grad_norm": 43.90269283165277, "learning_rate": 3.994322228989169e-07, "loss": 0.3167, "step": 1365, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8938271604938272, "success_rate.epoch.env.math": 0.9547218628719275, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7509363295880149, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613961899296385, "success_rate.epoch.global": 0.8608663729809104, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9920164233576643, "tokens_p.mean_in_band": 0.7171875, "tokens_rate.above_band": 0.9647887323943662, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035211267605633804 }, { "epoch": 0.29186195142735405, "grad_norm": 108.55430406989052, "learning_rate": 3.9942647543287454e-07, "loss": 0.2433, "step": 1370, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8940886699507389, "success_rate.epoch.env.math": 0.9548387096774194, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7516279069767442, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861493456625196, "success_rate.epoch.global": 0.8610095098756401, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933286516853933, "tokens_p.mean_below_band": 3.655441105365753e-08, "tokens_p.mean_in_band": 0.693359375, "tokens_rate.above_band": 0.9468085106382979, "tokens_rate.below_band": 0.010638297872340425, "tokens_rate.in_band": 0.0425531914893617 }, { "epoch": 0.2929271410311035, "grad_norm": 84.60491200856944, "learning_rate": 3.9942069923310024e-07, "loss": 0.1952, "step": 1375, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8946078431372549, "success_rate.epoch.env.math": 0.954954954954955, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7527777777777778, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617105036715224, "success_rate.epoch.global": 0.8615160349854227, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9952651515151515, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9880239520958084, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011976047904191617 }, { "epoch": 0.293992330634853, "grad_norm": 0.0, "learning_rate": 3.994148943038037e-07, "loss": 0.2139, "step": 1380, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8948655256723717, "success_rate.epoch.env.math": 0.9551856594110115, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.753690036900369, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618495653416887, "success_rate.epoch.global": 0.8620188816267248, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955778301886793, "tokens_p.mean_in_band": 0.7643229166666666, "tokens_rate.above_band": 0.9217391304347826, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0782608695652174 }, { "epoch": 0.2950575202386025, "grad_norm": 30.945606159980354, "learning_rate": 3.994090606492153e-07, "loss": 0.3553, "step": 1385, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8951219512195122, "success_rate.epoch.env.math": 0.9554140127388535, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7548209366391184, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8619964452156643, "success_rate.epoch.global": 0.8625180897250362, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9922680412371134, "tokens_p.mean_in_band": 0.759765625, "tokens_rate.above_band": 0.9603960396039604, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039603960396039604 }, { "epoch": 0.29612270984235195, "grad_norm": 547.090465167263, "learning_rate": 3.994031982735868e-07, "loss": 0.2379, "step": 1390, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.8956310679611651, "success_rate.epoch.env.math": 0.9555837563451777, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7557182067703568, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8621931585974602, "success_rate.epoch.global": 0.863013698630137, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976415094339622, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.99375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00625 }, { "epoch": 0.2971878994461014, "grad_norm": 72.9360345656743, "learning_rate": 3.9939730718119053e-07, "loss": 0.3989, "step": 1395, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9315068493150684, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8956310679611651, "success_rate.epoch.env.math": 0.9555837563451777, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7563636363636363, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623724564266751, "success_rate.epoch.global": 0.8631465517241379, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969446163366337, "tokens_p.mean_in_band": 0.7836441532258065, "tokens_rate.above_band": 0.9811778992106861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018822100789313904 }, { "epoch": 0.2982530890498509, "grad_norm": 113.12336818495687, "learning_rate": 3.993913873763199e-07, "loss": 0.3762, "step": 1400, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.8956310679611651, "success_rate.epoch.env.math": 0.95448798988622, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7565610859728507, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611854674795779, "success_rate.epoch.global": 0.8625626342161775, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983119235836627, "tokens_p.mean_in_band": 0.6292146381578947, "tokens_rate.above_band": 0.9755784061696658, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02442159383033419 }, { "epoch": 0.29931827865360033, "grad_norm": 215.39438754700254, "learning_rate": 3.993854388632892e-07, "loss": 0.3486, "step": 1405, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.8958837772397095, "success_rate.epoch.env.math": 0.9547169811320755, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7567567567567568, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861247046689424, "success_rate.epoch.global": 0.8626961483594865, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9893465909090909, "tokens_p.mean_in_band": 0.7084517045454546, "tokens_rate.above_band": 0.8888888888888888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1111111111111111 }, { "epoch": 0.3003834682573498, "grad_norm": 45.96687000680754, "learning_rate": 3.993794616464337e-07, "loss": 0.2201, "step": 1410, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.8961352657004831, "success_rate.epoch.env.math": 0.9549436795994993, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7571942446043165, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616368868847261, "success_rate.epoch.global": 0.8631840796019901, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989940987124464, "tokens_p.mean_in_band": 0.869140625, "tokens_rate.above_band": 0.9957264957264957, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004273504273504274 }, { "epoch": 0.30144865786109926, "grad_norm": 110.98541362905216, "learning_rate": 3.9937345573010957e-07, "loss": 0.3345, "step": 1415, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9420289855072463, "success_rate.epoch.env.logic": 0.8966346153846154, "success_rate.epoch.env.math": 0.9549436795994993, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7573858549686661, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617495314771464, "success_rate.epoch.global": 0.863314447592068, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966707021791767, "tokens_p.mean_in_band": 0.447265625, "tokens_rate.above_band": 0.9809976247030879, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019002375296912115 }, { "epoch": 0.3025138474648487, "grad_norm": 38.80479025769866, "learning_rate": 3.9936742111869385e-07, "loss": 0.386, "step": 1420, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9420289855072463, "success_rate.epoch.env.logic": 0.8968824940047961, "success_rate.epoch.env.math": 0.9551681195516812, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7571428571428571, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861873464959099, "success_rate.epoch.global": 0.8634438955539873, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967485549132948, "tokens_p.mean_in_band": 0.7042410714285714, "tokens_rate.above_band": 0.9866920152091255, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013307984790874524 }, { "epoch": 0.30357903706859823, "grad_norm": 63.77814085868929, "learning_rate": 3.993613578165845e-07, "loss": 0.1822, "step": 1425, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.8973747016706444, "success_rate.epoch.env.math": 0.9553349875930521, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7577916295636687, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8620800196326162, "success_rate.epoch.global": 0.8639240506329114, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985062141491395, "tokens_p.mean_in_band": 0.7063802083333334, "tokens_rate.above_band": 0.9942965779467681, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005703422053231939 }, { "epoch": 0.3046442266723477, "grad_norm": 81.32942040954173, "learning_rate": 3.993552658282004e-07, "loss": 0.2428, "step": 1430, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.8981042654028436, "success_rate.epoch.env.math": 0.9555555555555556, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7582222222222222, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622167247098269, "success_rate.epoch.global": 0.8644008409250176, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965034965034965, "tokens_p.mean_in_band": 0.814453125, "tokens_rate.above_band": 0.9533333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04666666666666667 }, { "epoch": 0.30570941627609716, "grad_norm": 137.3063989115003, "learning_rate": 3.993491451579814e-07, "loss": 0.3279, "step": 1435, "success_rate.epoch.env.abd": 0.9844961240310077, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.8983451536643026, "success_rate.epoch.env.math": 0.9556650246305419, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7586206896551724, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622958109644091, "success_rate.epoch.global": 0.8645251396648045, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957157258064516, "tokens_p.mean_in_band": 0.6741071428571429, "tokens_rate.above_band": 0.9465648854961832, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05343511450381679 }, { "epoch": 0.3067746058798466, "grad_norm": 122.5322727047395, "learning_rate": 3.993429958103882e-07, "loss": 0.3459, "step": 1440, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9428571428571428, "success_rate.epoch.env.logic": 0.8985849056603774, "success_rate.epoch.env.math": 0.9557739557739557, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7594713656387665, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862501659674578, "success_rate.epoch.global": 0.8649965205288797, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987139917695473, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.9759036144578314, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024096385542168676 }, { "epoch": 0.3078397954835961, "grad_norm": 201.48160381375968, "learning_rate": 3.9933681778990234e-07, "loss": 0.3875, "step": 1445, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9432624113475178, "success_rate.epoch.env.logic": 0.8988235294117647, "success_rate.epoch.env.math": 0.9547677261613692, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7601054481546573, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862537040125229, "success_rate.epoch.global": 0.8651178918169209, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943311737804879, "tokens_p.mean_in_band": 0.5791015625, "tokens_rate.above_band": 0.9879518072289156, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012048192771084338 }, { "epoch": 0.30890498508734554, "grad_norm": 139.77347577911482, "learning_rate": 3.993306111010264e-07, "loss": 0.2607, "step": 1450, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.8992974238875878, "success_rate.epoch.env.math": 0.9549878345498783, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7605263157894737, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627105313259178, "success_rate.epoch.global": 0.8655839668279198, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976095699277742, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.9996718083360683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0003281916639317361 }, { "epoch": 0.309970174691095, "grad_norm": 51.08228919367456, "learning_rate": 3.993243757482837e-07, "loss": 0.1899, "step": 1455, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9002320185614849, "success_rate.epoch.env.math": 0.9550970873786407, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7602799650043744, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627830310275141, "success_rate.epoch.global": 0.8657024793388429, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960585585585585, "tokens_p.mean_in_band": 0.5130208333333334, "tokens_rate.above_band": 0.9487179487179487, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05128205128205128 }, { "epoch": 0.31103536429484446, "grad_norm": 86.8509214605948, "learning_rate": 3.9931811173621857e-07, "loss": 0.2136, "step": 1460, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9210526315789473, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.8986175115207373, "success_rate.epoch.env.math": 0.9552599758162031, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7609075043630017, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628038085124587, "success_rate.epoch.global": 0.8658201784488675, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958125, "tokens_p.mean_in_band": 0.7569444444444444, "tokens_rate.above_band": 0.9823182711198428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01768172888015717 }, { "epoch": 0.3121005538985939, "grad_norm": 173.6463091852164, "learning_rate": 3.9931181906939617e-07, "loss": 0.3204, "step": 1465, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9210526315789473, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.8986175115207373, "success_rate.epoch.env.math": 0.9554753309265944, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7610773240660296, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628863213172884, "success_rate.epoch.global": 0.86593707250342, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981884057971014, "tokens_p.mean_in_band": 0.6276041666666666, "tokens_rate.above_band": 0.9387755102040817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061224489795918366 }, { "epoch": 0.31316574350234344, "grad_norm": 88.2591646850361, "learning_rate": 3.993054977524025e-07, "loss": 0.3412, "step": 1470, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.8990825688073395, "success_rate.epoch.env.math": 0.9543817527010804, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7610389610389611, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630097220020151, "success_rate.epoch.global": 0.8657123381049762, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990781710914455, "tokens_p.mean_in_band": 0.5185546875, "tokens_rate.above_band": 0.9883381924198251, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011661807580174927 }, { "epoch": 0.3142309331060929, "grad_norm": 66.02744915693027, "learning_rate": 3.992991477898445e-07, "loss": 0.4142, "step": 1475, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.8997722095671982, "success_rate.epoch.env.math": 0.954653937947494, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7612456747404844, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86316238245089, "success_rate.epoch.global": 0.8661684782608695, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992806905370843, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.31529612270984236, "grad_norm": 49.1632575748, "learning_rate": 3.9929276918635006e-07, "loss": 0.3243, "step": 1480, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9004524886877828, "success_rate.epoch.env.math": 0.9548156956004756, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.7601380500431406, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625440610294177, "success_rate.epoch.global": 0.8656059580230197, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969758064516129, "tokens_p.mean_in_band": 0.5425646551724138, "tokens_rate.above_band": 0.8104575163398693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1895424836601307 }, { "epoch": 0.3163613123135918, "grad_norm": 84.44931247002147, "learning_rate": 3.992863619465678e-07, "loss": 0.3398, "step": 1485, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9009009009009009, "success_rate.epoch.env.math": 0.9549228944246738, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.7592433361994841, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625586312997752, "success_rate.epoch.global": 0.8653846153846154, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972426470588235, "tokens_p.mean_in_band": 0.5478515625, "tokens_rate.above_band": 0.9324894514767933, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06751054852320675 }, { "epoch": 0.3174265019173413, "grad_norm": 110.02614841180721, "learning_rate": 3.9927992607516725e-07, "loss": 0.2173, "step": 1490, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9013452914798207, "success_rate.epoch.env.math": 0.9551886792452831, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.7579399141630901, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625152143710607, "success_rate.epoch.global": 0.8651647612642905, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939903846153846, "tokens_p.mean_in_band": 0.574951171875, "tokens_rate.above_band": 0.9069767441860465, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09302325581395349 }, { "epoch": 0.31849169152109075, "grad_norm": 117.81862692191989, "learning_rate": 3.9927346157683887e-07, "loss": 0.2136, "step": 1495, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9240506329113924, "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9013452914798207, "success_rate.epoch.env.math": 0.9553466509988249, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.7587681779298546, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627731094830312, "success_rate.epoch.global": 0.8656166219839142, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974279835390947, "tokens_p.mean_in_band": 0.7125, "tokens_rate.above_band": 0.9931880108991825, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006811989100817439 }, { "epoch": 0.3195568811248402, "grad_norm": 120.70531930200049, "learning_rate": 3.992669684562939e-07, "loss": 0.3861, "step": 1500, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9240506329113924, "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9017857142857143, "success_rate.epoch.env.math": 0.9554513481828839, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.75809199318569, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627960255746893, "success_rate.epoch.global": 0.8653974615898463, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972245065789473, "tokens_p.mean_in_band": 0.43828125, "tokens_rate.above_band": 0.9681528662420382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03184713375796178 }, { "epoch": 0.32062207072858967, "grad_norm": 51.607289914165435, "learning_rate": 3.992604467182645e-07, "loss": 0.2148, "step": 1505, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9240506329113924, "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9020044543429844, "success_rate.epoch.env.math": 0.955607476635514, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7589134125636672, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623731469084622, "success_rate.epoch.global": 0.8655126498002663, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988532110091743, "tokens_p.mean_in_band": 0.63037109375, "tokens_rate.above_band": 0.9533527696793003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04664723032069971 }, { "epoch": 0.32168726033233913, "grad_norm": 65.40469120827466, "learning_rate": 3.9925389636750364e-07, "loss": 0.1952, "step": 1510, "success_rate.epoch.env.abd": 0.9849624060150376, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9020044543429844, "success_rate.epoch.env.math": 0.9558139534883721, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7578323454699407, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624337360994416, "success_rate.epoch.global": 0.8652952886529529, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666668, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983078917050692, "tokens_p.mean_in_band": 0.5868055555555556, "tokens_rate.above_band": 0.9897377423033067, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010262257696693273 }, { "epoch": 0.32275244993608865, "grad_norm": 164.19980498880886, "learning_rate": 3.9924731740878523e-07, "loss": 0.5151, "step": 1515, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.956989247311828, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9559164733178654, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7580101180438449, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862329696744209, "success_rate.epoch.global": 0.8650793650793651, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999433192261185, "tokens_p.mean_in_band": 0.568359375, "tokens_rate.above_band": 0.9560693641618497, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04393063583815029 }, { "epoch": 0.3238176395398381, "grad_norm": 61.94851402954748, "learning_rate": 3.992407098469039e-07, "loss": 0.2012, "step": 1520, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9004424778761062, "success_rate.epoch.env.math": 0.9560185185185185, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7590260285474392, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862513150815042, "success_rate.epoch.global": 0.8655240606460118, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971751412429378, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.32488282914358757, "grad_norm": 186.84805360085605, "learning_rate": 3.992340736866753e-07, "loss": 0.5019, "step": 1525, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9006622516556292, "success_rate.epoch.env.math": 0.9560693641618497, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7577276524644946, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624704899051228, "success_rate.epoch.global": 0.8649802890932983, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961890243902439, "tokens_p.mean_in_band": 0.28076171875, "tokens_rate.above_band": 0.9318181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06818181818181818 }, { "epoch": 0.32594801874733703, "grad_norm": 351.9884742490813, "learning_rate": 3.992274089329356e-07, "loss": 0.3237, "step": 1530, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9010989010989011, "success_rate.epoch.env.math": 0.956221198156682, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7587354409317804, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626156055328855, "success_rate.epoch.global": 0.8654223968565815, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9935963114754098, "tokens_p.mean_in_band": 0.8697916666666666, "tokens_rate.above_band": 0.976, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024 }, { "epoch": 0.3270132083510865, "grad_norm": 74.54036807916403, "learning_rate": 3.992207155905423e-07, "loss": 0.1336, "step": 1535, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9452054794520548, "success_rate.epoch.env.logic": 0.9019607843137255, "success_rate.epoch.env.math": 0.956271576524741, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7595356550580431, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628056390741858, "success_rate.epoch.global": 0.8658616187989556, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969866071428571, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9982174688057041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017825311942959 }, { "epoch": 0.32807839795483595, "grad_norm": 100.67878856565883, "learning_rate": 3.992139936643733e-07, "loss": 0.3956, "step": 1540, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9455782312925171, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9563719862227325, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7603305785123967, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629622010519395, "success_rate.epoch.global": 0.8659726740403383, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970596280087527, "tokens_p.mean_in_band": 0.6303267045454546, "tokens_rate.above_band": 0.9764957264957265, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023504273504273504 }, { "epoch": 0.3291435875585854, "grad_norm": 77.36439900993793, "learning_rate": 3.992072431593275e-07, "loss": 0.2907, "step": 1545, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9455782312925171, "success_rate.epoch.env.logic": 0.9004329004329005, "success_rate.epoch.env.math": 0.9564220183486238, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7615131578947368, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631526615251883, "success_rate.epoch.global": 0.8664072632944229, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960656474820144, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.996415770609319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0035842293906810036 }, { "epoch": 0.3302087771623349, "grad_norm": 96.53931145528712, "learning_rate": 3.992004640803246e-07, "loss": 0.2384, "step": 1550, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9012875536480687, "success_rate.epoch.env.math": 0.9565714285714285, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7619047619047619, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633129689889544, "success_rate.epoch.global": 0.8668390433096316, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996141975308642, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9918367346938776, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00816326530612245 }, { "epoch": 0.33127396676608434, "grad_norm": 686.4017677934039, "learning_rate": 3.991936564323052e-07, "loss": 0.3249, "step": 1555, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9463087248322147, "success_rate.epoch.env.logic": 0.8993576017130621, "success_rate.epoch.env.math": 0.95662100456621, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.761437908496732, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631424673525807, "success_rate.epoch.global": 0.8663015463917526, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9989543114543115, "tokens_p.mean_in_band": 0.6637834821428571, "tokens_rate.above_band": 0.9823008849557522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017699115044247787 }, { "epoch": 0.33233915636983385, "grad_norm": 224.07292739092742, "learning_rate": 3.9918682022023065e-07, "loss": 0.5372, "step": 1560, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.8995726495726496, "success_rate.epoch.env.math": 0.9555808656036446, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7610114192495921, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86266825087936, "success_rate.epoch.global": 0.8657675016056519, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9950471698113208, "tokens_p.mean_in_band": 0.5339439655172413, "tokens_rate.above_band": 0.9481216457960644, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0518783542039356 }, { "epoch": 0.3334043459735833, "grad_norm": 47.40386491265625, "learning_rate": 3.9917995544908316e-07, "loss": 0.2637, "step": 1565, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.8997867803837953, "success_rate.epoch.env.math": 0.95578231292517, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7607811228641171, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627218256203816, "success_rate.epoch.global": 0.865877080665813, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981475515463918, "tokens_p.mean_in_band": 0.55625, "tokens_rate.above_band": 0.9872773536895675, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01272264631043257 }, { "epoch": 0.3344695355773328, "grad_norm": 38.540465202751186, "learning_rate": 3.9917306212386564e-07, "loss": 0.3496, "step": 1570, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.925, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9002123142250531, "success_rate.epoch.env.math": 0.9560315670800451, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.760551948051948, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862762335909833, "success_rate.epoch.global": 0.8659859604339503, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968327702702703, "tokens_p.mean_in_band": 0.5712890625, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02631578947368421 }, { "epoch": 0.33553472518108224, "grad_norm": 79.44511342691798, "learning_rate": 3.99166140249602e-07, "loss": 0.318, "step": 1575, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9006342494714588, "success_rate.epoch.env.math": 0.956081081081081, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.761326860841424, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630054371907057, "success_rate.epoch.global": 0.8664122137404581, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976156655844156, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9983792544570502, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016207455429497568 }, { "epoch": 0.3365999147848317, "grad_norm": 81.54056074429866, "learning_rate": 3.9915918983133674e-07, "loss": 0.2979, "step": 1580, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.8987341772151899, "success_rate.epoch.env.math": 0.9562780269058296, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7612903225806451, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628567644381835, "success_rate.epoch.global": 0.8662016487000634, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99734375, "tokens_p.mean_in_band": 0.48270089285714285, "tokens_rate.above_band": 0.9448818897637795, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05511811023622047 }, { "epoch": 0.33766510438858116, "grad_norm": 79.58485444247916, "learning_rate": 3.991522108741354e-07, "loss": 0.2972, "step": 1585, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.8991596638655462, "success_rate.epoch.env.math": 0.9563758389261745, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7614457831325301, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629278129968657, "success_rate.epoch.global": 0.8663084702907712, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980540293040293, "tokens_p.mean_in_band": 0.65, "tokens_rate.above_band": 0.9820143884892086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017985611510791366 }, { "epoch": 0.3387302939923306, "grad_norm": 94.54094001351449, "learning_rate": 3.991452033830841e-07, "loss": 0.4793, "step": 1590, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.899581589958159, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7612179487179487, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629908305209835, "success_rate.epoch.global": 0.8664146187775678, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944196428571429, "tokens_p.mean_below_band": 4.602043190971017e-10, "tokens_p.mean_in_band": 0.7946428571428571, "tokens_rate.above_band": 0.9871794871794872, "tokens_rate.below_band": 0.0016025641025641025, "tokens_rate.in_band": 0.011217948717948718 }, { "epoch": 0.3397954835960801, "grad_norm": 130.93538291608567, "learning_rate": 3.991381673632899e-07, "loss": 0.3931, "step": 1595, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9002079002079002, "success_rate.epoch.env.math": 0.9567147613762487, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7617905675459632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631173715503988, "success_rate.epoch.global": 0.8668341708542714, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957561728395061, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.9759036144578314, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024096385542168676 }, { "epoch": 0.34086067319982954, "grad_norm": 131.1681464740304, "learning_rate": 3.9913110281988054e-07, "loss": 0.4012, "step": 1600, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9146341463414634, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9006211180124224, "success_rate.epoch.env.math": 0.9568106312292359, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7627388535031847, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622233346440598, "success_rate.epoch.global": 0.8669380087664371, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968826034063261, "tokens_p.mean_in_band": 0.70068359375, "tokens_rate.above_band": 0.9903614457831326, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00963855421686747 }, { "epoch": 0.34192586280357906, "grad_norm": 155.88479947828057, "learning_rate": 3.991240097580047e-07, "loss": 0.2929, "step": 1605, "success_rate.epoch.env.abd": 0.9858156028368794, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9006211180124224, "success_rate.epoch.env.math": 0.9569060773480663, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7626984126984127, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624540070317703, "success_rate.epoch.global": 0.8670411985018727, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986590485074627, "tokens_p.mean_in_band": 0.5579427083333334, "tokens_rate.above_band": 0.9944341372912802, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0055658627087198514 }, { "epoch": 0.3429910524073285, "grad_norm": 56.745914811429785, "learning_rate": 3.9911688818283167e-07, "loss": 0.3288, "step": 1610, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9008264462809917, "success_rate.epoch.env.math": 0.9570011025358324, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7626582278481012, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625566495469177, "success_rate.epoch.global": 0.8671437461107654, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972587719298246, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.991304347826087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008695652173913044 }, { "epoch": 0.344056242011078, "grad_norm": 58.56698539334295, "learning_rate": 3.991097380995516e-07, "loss": 0.297, "step": 1615, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9176470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9477124183006536, "success_rate.epoch.env.logic": 0.9012345679012346, "success_rate.epoch.env.math": 0.9570484581497798, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7632202052091555, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862787327924508, "success_rate.epoch.global": 0.8675558312655087, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980961134453782, "tokens_p.mean_in_band": 0.7682291666666666, "tokens_rate.above_band": 0.9968586387434555, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0031413612565445027 }, { "epoch": 0.34512143161482745, "grad_norm": 67.4285831649213, "learning_rate": 3.991025595133755e-07, "loss": 0.2021, "step": 1620, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9176470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.948051948051948, "success_rate.epoch.env.logic": 0.9016393442622951, "success_rate.epoch.env.math": 0.9572368421052632, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7622047244094489, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627798014943381, "success_rate.epoch.global": 0.8673469387755102, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972178060413355, "tokens_p.mean_in_band": 0.5549879807692307, "tokens_rate.above_band": 0.9797507788161994, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020249221183800622 }, { "epoch": 0.3461866212185769, "grad_norm": 90.80392114340461, "learning_rate": 3.99095352429535e-07, "loss": 0.2578, "step": 1625, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9186046511627907, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9016393442622951, "success_rate.epoch.env.math": 0.9573304157549234, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7629513343799058, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630377278234428, "success_rate.epoch.global": 0.8677558569667078, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991514513108615, "tokens_p.mean_in_band": 0.771484375, "tokens_rate.above_band": 0.9925650557620818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007434944237918215 }, { "epoch": 0.34725181082232637, "grad_norm": 87.63793387690875, "learning_rate": 3.990881168532827e-07, "loss": 0.3226, "step": 1630, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9080459770114943, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9020408163265307, "success_rate.epoch.env.math": 0.9574235807860262, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7636932707355243, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862223564156712, "success_rate.epoch.global": 0.8678549477566072, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980300859598854, "tokens_p.mean_in_band": 0.76611328125, "tokens_rate.above_band": 0.9886685552407932, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0113314447592068 }, { "epoch": 0.34831700042607583, "grad_norm": 83.51143383530129, "learning_rate": 3.990808527898916e-07, "loss": 0.337, "step": 1635, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9080459770114943, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9026369168356998, "success_rate.epoch.env.math": 0.9575625680087051, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.76328125, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623119466008909, "success_rate.epoch.global": 0.867953431372549, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979940878378378, "tokens_p.mean_in_band": 0.6865234375, "tokens_rate.above_band": 0.961038961038961, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03896103896103896 }, { "epoch": 0.3493821900298253, "grad_norm": 60.8269867052626, "learning_rate": 3.9907356024465587e-07, "loss": 0.3943, "step": 1640, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.902834008097166, "success_rate.epoch.env.math": 0.9566630552546046, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7632398753894081, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613062647474868, "success_rate.epoch.global": 0.8674404398289554, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9965717299578059, "tokens_p.mean_in_band": 0.614453125, "tokens_rate.above_band": 0.9595141700404858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04048582995951417 }, { "epoch": 0.35044737963357475, "grad_norm": 112.00326917942903, "learning_rate": 3.990662392228902e-07, "loss": 0.3135, "step": 1645, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.9567099567099567, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7636080870917574, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614871257197677, "success_rate.epoch.global": 0.8678440925700366, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982135668276972, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.35151256923732427, "grad_norm": 265.864720672112, "learning_rate": 3.990588897299302e-07, "loss": 0.2984, "step": 1650, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9036144578313253, "success_rate.epoch.env.math": 0.9568500539374326, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7641582622187743, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616219139652707, "success_rate.epoch.global": 0.8682452944748027, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983333333333333, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.35257775884107373, "grad_norm": 175.9035112668465, "learning_rate": 3.99051511771132e-07, "loss": 0.4593, "step": 1655, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9018036072144289, "success_rate.epoch.env.math": 0.9570354457572503, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7633410672853829, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613998545352277, "success_rate.epoch.global": 0.8677360774818402, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975378787878788, "tokens_p.mean_in_band": 0.5579427083333334, "tokens_rate.above_band": 0.9322033898305084, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06779661016949153 }, { "epoch": 0.3536429484448232, "grad_norm": 48.11765350577683, "learning_rate": 3.9904410535187265e-07, "loss": 0.2187, "step": 1660, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9005964214711729, "success_rate.epoch.env.math": 0.9571275455519829, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.764070932922128, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613648345068843, "success_rate.epoch.global": 0.867833433916717, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9928728070175439, "tokens_p.mean_in_band": 0.6769386574074074, "tokens_rate.above_band": 0.8941176470588236, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10588235294117647 }, { "epoch": 0.35470813804857265, "grad_norm": 30.89964757892007, "learning_rate": 3.990366704775499e-07, "loss": 0.5268, "step": 1665, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9011857707509882, "success_rate.epoch.env.math": 0.9572192513368984, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.7624903920061491, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8608501626329612, "success_rate.epoch.global": 0.8670276774969916, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9954594017094017, "tokens_p.mean_in_band": 0.4809027777777778, "tokens_rate.above_band": 0.8125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1875 }, { "epoch": 0.3557733276523221, "grad_norm": 62.64945174130081, "learning_rate": 3.990292071535822e-07, "loss": 0.2773, "step": 1670, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8977272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9506172839506173, "success_rate.epoch.env.logic": 0.9017681728880157, "success_rate.epoch.env.math": 0.9573560767590619, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.7630368098159509, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609931053887249, "success_rate.epoch.global": 0.8674265146970606, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9895833333333334, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.3568385172560716, "grad_norm": 74.75262038442307, "learning_rate": 3.9902171538540884e-07, "loss": 0.375, "step": 1675, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.898876404494382, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.90234375, "success_rate.epoch.env.math": 0.9574468085106383, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.7626339969372129, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611574251944927, "success_rate.epoch.global": 0.8675239234449761, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991347719869706, "tokens_p.mean_in_band": 0.3053385416666667, "tokens_rate.above_band": 0.9903225806451613, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00967741935483871 }, { "epoch": 0.35790370685982104, "grad_norm": 78.76786976636863, "learning_rate": 3.9901419517848974e-07, "loss": 0.2368, "step": 1680, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.9025341130604289, "success_rate.epoch.env.math": 0.9574920297555791, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.7635392829900839, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614035917464868, "success_rate.epoch.global": 0.8679189028026237, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993265086206896, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.3589688964635705, "grad_norm": 275.52401010424194, "learning_rate": 3.990066465383055e-07, "loss": 0.4104, "step": 1685, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9007782101167315, "success_rate.epoch.env.math": 0.9556025369978859, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.763899466869764, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611402772404188, "success_rate.epoch.global": 0.8674197384066588, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9966457960644007, "tokens_p.mean_in_band": 0.6673519736842105, "tokens_rate.above_band": 0.967128027681661, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0328719723183391 }, { "epoch": 0.36003408606731996, "grad_norm": 162.86737483365874, "learning_rate": 3.989990694703576e-07, "loss": 0.2919, "step": 1690, "success_rate.epoch.env.abd": 0.9867549668874173, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9007782101167315, "success_rate.epoch.env.math": 0.9557428872497366, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7646165527714502, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604733829563572, "success_rate.epoch.global": 0.8672199170124482, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995895127118644, "tokens_p.mean_in_band": 0.5921223958333334, "tokens_rate.above_band": 0.8676470588235294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1323529411764706 }, { "epoch": 0.3610992756710695, "grad_norm": 108.43387266027811, "learning_rate": 3.989914639801681e-07, "loss": 0.2812, "step": 1695, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9007782101167315, "success_rate.epoch.env.math": 0.9559748427672956, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.765329295987888, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605671863299696, "success_rate.epoch.global": 0.8676122931442081, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961309523809524, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9905660377358491, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009433962264150943 }, { "epoch": 0.36216446527481894, "grad_norm": 416.0216363403163, "learning_rate": 3.989838300732799e-07, "loss": 0.5509, "step": 1700, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8994197292069632, "success_rate.epoch.env.math": 0.9561128526645768, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7658610271903323, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605360409411528, "success_rate.epoch.global": 0.8677077195050088, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999312106918239, "tokens_p.mean_in_band": 0.6130208333333333, "tokens_rate.above_band": 0.9769585253456221, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02304147465437788 }, { "epoch": 0.3632296548785684, "grad_norm": 203.39997145765022, "learning_rate": 3.9897616775525646e-07, "loss": 0.4874, "step": 1705, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8994197292069632, "success_rate.epoch.env.math": 0.9562955254942768, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7652370203160271, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605958197642706, "success_rate.epoch.global": 0.867508813160987, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99619708994709, "tokens_p.mean_in_band": 0.5598958333333334, "tokens_rate.above_band": 0.9767441860465116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023255813953488372 }, { "epoch": 0.36429484448231786, "grad_norm": 149.26613329584015, "learning_rate": 3.9896847703168206e-07, "loss": 0.485, "step": 1710, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8998073217726397, "success_rate.epoch.env.math": 0.9564766839378238, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7655897821187078, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8607104835954087, "success_rate.epoch.global": 0.8678968951376684, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998956013363029, "tokens_p.mean_in_band": 0.865234375, "tokens_rate.above_band": 0.9955654101995566, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004434589800443459 }, { "epoch": 0.3653600340860673, "grad_norm": 116.37779562520686, "learning_rate": 3.9896075790816163e-07, "loss": 0.348, "step": 1715, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9001919385796545, "success_rate.epoch.env.math": 0.9567010309278351, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.765015015015015, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860721410504985, "success_rate.epoch.global": 0.8679906542056075, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997093023255814, "tokens_p.mean_in_band": 0.31640625, "tokens_rate.above_band": 0.9817351598173516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0182648401826484 }, { "epoch": 0.3664252236898168, "grad_norm": 53.77509510047899, "learning_rate": 3.9895301039032085e-07, "loss": 0.2927, "step": 1720, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9010989010989011, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9003831417624522, "success_rate.epoch.env.math": 0.9570112589559877, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7653673163418291, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8607990225538728, "success_rate.epoch.global": 0.8683750728013978, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9902146464646465, "tokens_p.mean_in_band": 0.7958984375, "tokens_rate.above_band": 0.9611650485436893, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038834951456310676 }, { "epoch": 0.36749041329356624, "grad_norm": 82.51702436627924, "learning_rate": 3.98945234483806e-07, "loss": 0.2838, "step": 1725, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.900952380952381, "success_rate.epoch.env.math": 0.9570990806945863, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7653213751868461, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86095230725552, "success_rate.epoch.global": 0.8684668989547039, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980902777777778, "tokens_p.mean_in_band": 0.6938100961538461, "tokens_rate.above_band": 0.9651474530831099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03485254691689008 }, { "epoch": 0.3685556028973157, "grad_norm": 51.601020862501755, "learning_rate": 3.989374301942841e-07, "loss": 0.3697, "step": 1730, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9011406844106464, "success_rate.epoch.env.math": 0.9573604060913705, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7651006711409396, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609731186018057, "success_rate.epoch.global": 0.8685581933989577, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964080459770115, "tokens_p.mean_in_band": 0.443359375, "tokens_rate.above_band": 0.9560439560439561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04395604395604396 }, { "epoch": 0.36962079250106517, "grad_norm": 172.2472924451879, "learning_rate": 3.989295975274429e-07, "loss": 0.4709, "step": 1735, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9015151515151515, "success_rate.epoch.env.math": 0.9564777327935222, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7650557620817844, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609531637009229, "success_rate.epoch.global": 0.8683602771362586, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974563953488372, "tokens_p.mean_in_band": 0.546875, "tokens_rate.above_band": 0.9756888168557536, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024311183144246355 }, { "epoch": 0.3706859821048147, "grad_norm": 134.34887301630633, "learning_rate": 3.9892173648899077e-07, "loss": 0.298, "step": 1740, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515151515151515, "success_rate.epoch.env.logic": 0.9017013232514177, "success_rate.epoch.env.math": 0.9555106167846309, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7660991857883049, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610336836174249, "success_rate.epoch.global": 0.8684513529073115, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978693181818182, "tokens_p.mean_in_band": 0.4921875, "tokens_rate.above_band": 0.99, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01 }, { "epoch": 0.37175117170856414, "grad_norm": 63.58531877240587, "learning_rate": 3.989138470846568e-07, "loss": 0.2935, "step": 1745, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9018867924528302, "success_rate.epoch.env.math": 0.9546827794561934, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.766789667896679, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610646101066852, "success_rate.epoch.global": 0.8685419058553386, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996234939759037, "tokens_p.mean_in_band": 0.6732584635416666, "tokens_rate.above_band": 0.991044776119403, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008955223880597015 }, { "epoch": 0.3728163613123136, "grad_norm": 76.26688553352443, "learning_rate": 3.989059293201907e-07, "loss": 0.4543, "step": 1750, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.9021739130434783, "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9020715630885122, "success_rate.epoch.env.math": 0.954954954954955, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.7665684830633285, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861086042952239, "success_rate.epoch.global": 0.8686319404693761, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9914772727272727, "tokens_p.mean_in_band": 0.6436631944444444, "tokens_rate.above_band": 0.9243697478991597, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07563025210084033 }, { "epoch": 0.37388155091606307, "grad_norm": 106.52773155292499, "learning_rate": 3.9889798320136297e-07, "loss": 0.261, "step": 1755, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.9032258064516129, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9022556390977443, "success_rate.epoch.env.math": 0.9550898203592815, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7660044150110376, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602199733065579, "success_rate.epoch.global": 0.8681506849315068, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985119047619048, "tokens_p.mean_in_band": 0.5394736842105263, "tokens_rate.above_band": 0.9784090909090909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02159090909090909 }, { "epoch": 0.3749467405198125, "grad_norm": 141.56562190147378, "learning_rate": 3.988900087339645e-07, "loss": 0.1901, "step": 1760, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8936170212765957, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9022556390977443, "success_rate.epoch.env.math": 0.9552683896620279, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7661290322580645, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859374009795172, "success_rate.epoch.global": 0.8679567444507683, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972014925373134, "tokens_p.mean_in_band": 0.52734375, "tokens_rate.above_band": 0.9710144927536232, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028985507246376812 }, { "epoch": 0.376011930123562, "grad_norm": 115.16287068471944, "learning_rate": 3.988820059238072e-07, "loss": 0.3068, "step": 1765, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9009345794392524, "success_rate.epoch.env.math": 0.9554013875123885, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7666422823701536, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594144651708651, "success_rate.epoch.global": 0.8680476730987514, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956706549118388, "tokens_p.mean_in_band": 0.6513671875, "tokens_rate.above_band": 0.9900249376558603, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00997506234413965 }, { "epoch": 0.37707711972731145, "grad_norm": 207.01314214792083, "learning_rate": 3.988739747767234e-07, "loss": 0.3236, "step": 1770, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8958333333333334, "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9011194029850746, "success_rate.epoch.env.math": 0.9554896142433235, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7645772594752187, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859387155001968, "success_rate.epoch.global": 0.8672891907187323, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.8400000000000001, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.99906191369606, "tokens_p.mean_in_band": 0.5969669117647058, "tokens_rate.above_band": 0.9690909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03090909090909091 }, { "epoch": 0.3781423093310609, "grad_norm": 79.1865554169076, "learning_rate": 3.988659152985661e-07, "loss": 0.3867, "step": 1775, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.8958333333333334, "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9014869888475836, "success_rate.epoch.env.math": 0.9555774925962488, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7645348837209303, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594323255148505, "success_rate.epoch.global": 0.8673814898419865, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972587719298246, "tokens_p.mean_in_band": 0.5085227272727273, "tokens_rate.above_band": 0.9810671256454389, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0189328743545611 }, { "epoch": 0.37920749893481037, "grad_norm": 56.81154084391707, "learning_rate": 3.9885782749520904e-07, "loss": 0.4482, "step": 1780, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.8958333333333334, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9520958083832335, "success_rate.epoch.env.logic": 0.9018518518518519, "success_rate.epoch.env.math": 0.9556650246305419, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7650471356055112, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8589356074894532, "success_rate.epoch.global": 0.8674732695554305, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999683887283237, "tokens_p.mean_in_band": 0.2431640625, "tokens_rate.above_band": 0.9985569985569985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001443001443001443 }, { "epoch": 0.3802726885385599, "grad_norm": 94.01123671164818, "learning_rate": 3.988497113725466e-07, "loss": 0.4395, "step": 1785, "success_rate.epoch.env.abd": 0.9871794871794872, "success_rate.epoch.env.agentgym:alfworld": 0.8969072164948454, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9018518518518519, "success_rate.epoch.env.math": 0.9557086614173228, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7651734104046243, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859123727274449, "success_rate.epoch.global": 0.867564534231201, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9666666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987687155240347, "tokens_p.mean_in_band": 0.7047293526785714, "tokens_rate.above_band": 0.9890880748246298, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010911925175370226 }, { "epoch": 0.38133787814230935, "grad_norm": 261.2873970539276, "learning_rate": 3.9884156693649366e-07, "loss": 0.2541, "step": 1790, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8969072164948454, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9025735294117647, "success_rate.epoch.env.math": 0.9557522123893806, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7644092219020173, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8591312453968757, "success_rate.epoch.global": 0.8673754896474538, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9905973451327433, "tokens_p.mean_in_band": 0.5203993055555556, "tokens_rate.above_band": 0.9262295081967213, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07377049180327869 }, { "epoch": 0.3824030677460588, "grad_norm": 128.50981424547092, "learning_rate": 3.988333941929858e-07, "loss": 0.2765, "step": 1795, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9012797074954296, "success_rate.epoch.env.math": 0.9558390578999019, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7634795111430626, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8591263358814469, "success_rate.epoch.global": 0.8669084821428571, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0002307219031994, "tokens_p.mean_in_band": 0.5099734042553191, "tokens_rate.above_band": 0.9628751974723538, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03712480252764613 }, { "epoch": 0.3834682573498083, "grad_norm": 101.51869721822652, "learning_rate": 3.9882519314797937e-07, "loss": 0.7223, "step": 1800, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9012797074954296, "success_rate.epoch.env.math": 0.9549902152641878, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7644953471725126, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8592813772389193, "success_rate.epoch.global": 0.8670005564830273, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0005695625759417, "tokens_p.mean_in_band": 0.72265625, "tokens_rate.above_band": 0.9927623642943305, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007237635705669481 }, { "epoch": 0.38453344695355773, "grad_norm": 238.39867050449627, "learning_rate": 3.9881696380745117e-07, "loss": 0.4797, "step": 1805, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8910891089108911, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9014598540145985, "success_rate.epoch.env.math": 0.9551656920077972, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.765, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8586413305126701, "success_rate.epoch.global": 0.8670921198668147, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977609034267912, "tokens_p.mean_in_band": 0.556640625, "tokens_rate.above_band": 0.9907407407407407, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009259259259259259 }, { "epoch": 0.3855986365573072, "grad_norm": 165.70785372117808, "learning_rate": 3.988087061773987e-07, "loss": 0.5067, "step": 1810, "success_rate.epoch.env.abd": 0.9810126582278481, "success_rate.epoch.env.agentgym:alfworld": 0.8921568627450981, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9552529182879378, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7649572649572649, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8580416853115835, "success_rate.epoch.global": 0.8666297731045932, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9940807799442897, "tokens_p.mean_below_band": 4.805624485015869e-07, "tokens_p.mean_in_band": 0.14323846726190476, "tokens_rate.above_band": 0.45966709346991036, "tokens_rate.below_band": 0.002560819462227913, "tokens_rate.in_band": 0.5377720870678617 }, { "epoch": 0.38666382616105666, "grad_norm": 19.52626271575069, "learning_rate": 3.9880042026384e-07, "loss": 0.1823, "step": 1815, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8932038834951457, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9553831231813773, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7647476901208244, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8583108984559328, "success_rate.epoch.global": 0.8667218543046358, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987074209245742, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9879807692307693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01201923076923077 }, { "epoch": 0.3877290157648061, "grad_norm": 111.96252552555268, "learning_rate": 3.987921060728139e-07, "loss": 0.313, "step": 1820, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8942307692307693, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9001814882032668, "success_rate.epoch.env.math": 0.9555984555984556, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7652482269503547, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8584858296546144, "success_rate.epoch.global": 0.8670886075949367, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956837016574586, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9836956521739131, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016304347826086956 }, { "epoch": 0.3887942053685556, "grad_norm": 32.72777812688531, "learning_rate": 3.987837636103797e-07, "loss": 0.5374, "step": 1825, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9007220216606499, "success_rate.epoch.env.math": 0.9556412729026037, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7644978783592645, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8585878385355513, "success_rate.epoch.global": 0.8669045005488474, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980614143920595, "tokens_p.mean_in_band": 0.50390625, "tokens_rate.above_band": 0.9664268585131894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03357314148681055 }, { "epoch": 0.3898593949723051, "grad_norm": 226.50015530548728, "learning_rate": 3.987753928826172e-07, "loss": 0.3713, "step": 1830, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9010791366906474, "success_rate.epoch.env.math": 0.9557266602502407, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7654929577464789, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8587185277869014, "success_rate.epoch.global": 0.8672687465790914, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9909336419753086, "tokens_p.mean_in_band": 0.7723214285714286, "tokens_rate.above_band": 0.9585798816568047, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04142011834319527 }, { "epoch": 0.39092458457605456, "grad_norm": 155.28369210290896, "learning_rate": 3.987669938956271e-07, "loss": 0.4369, "step": 1835, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9012567324955116, "success_rate.epoch.env.math": 0.9558541266794626, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7661516853932584, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8588716487966237, "success_rate.epoch.global": 0.8676310043668122, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961231203007519, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9925373134328358, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007462686567164179 }, { "epoch": 0.391989774179804, "grad_norm": 115.30390205213794, "learning_rate": 3.9875856665553033e-07, "loss": 0.2586, "step": 1840, "success_rate.epoch.env.abd": 0.98125, "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9017857142857143, "success_rate.epoch.env.math": 0.9558964525407478, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7659425367904695, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8589798192539283, "success_rate.epoch.global": 0.8677191072400653, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996875, "tokens_p.mean_in_band": 0.72109375, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 0.3930549637835535, "grad_norm": 206.94307274182046, "learning_rate": 3.987501111684688e-07, "loss": 0.4904, "step": 1845, "success_rate.epoch.env.abd": 0.98125, "success_rate.epoch.env.agentgym:alfworld": 0.8952380952380953, "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9019607843137255, "success_rate.epoch.env.math": 0.9559808612440192, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7656903765690377, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8589804845730057, "success_rate.epoch.global": 0.8675352877307275, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9902083333333334, "tokens_p.mean_in_band": 0.623046875, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0625 }, { "epoch": 0.39412015338730294, "grad_norm": 76.81925583778981, "learning_rate": 3.987416274406047e-07, "loss": 0.3629, "step": 1850, "success_rate.epoch.env.abd": 0.9813664596273292, "success_rate.epoch.env.agentgym:alfworld": 0.8962264150943396, "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9024822695035462, "success_rate.epoch.env.math": 0.9561068702290076, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7658536585365854, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8591793530684554, "success_rate.epoch.global": 0.8678938819707634, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993181818181818, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.3951853429910524, "grad_norm": 116.64443385467045, "learning_rate": 3.987331154781209e-07, "loss": 0.3234, "step": 1855, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.8962264150943396, "success_rate.epoch.env.agentgym:sciworld": 0.9495798319327731, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9028268551236749, "success_rate.epoch.env.math": 0.956232159847764, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7663421418636995, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8593157776120957, "success_rate.epoch.global": 0.8682505399568035, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9944444444444445, "tokens_p.mean_in_band": 0.7317708333333334, "tokens_rate.above_band": 0.9836065573770492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01639344262295082 }, { "epoch": 0.39625053259480186, "grad_norm": 75.61262444922384, "learning_rate": 3.987245752872209e-07, "loss": 0.2077, "step": 1860, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9028268551236749, "success_rate.epoch.env.math": 0.9563567362428842, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7661346287300486, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594966098300592, "success_rate.epoch.global": 0.8683360258481422, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993592271293376, "tokens_p.mean_in_band": 0.611083984375, "tokens_rate.above_band": 0.9875389408099688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012461059190031152 }, { "epoch": 0.3973157221985513, "grad_norm": 100.09982518001627, "learning_rate": 3.987160068741287e-07, "loss": 0.2537, "step": 1865, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9035087719298246, "success_rate.epoch.env.math": 0.9563981042654028, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7669432918395575, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859635877824439, "success_rate.epoch.global": 0.8686895810955961, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967261904761905, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9929078014184397, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0070921985815602835 }, { "epoch": 0.3983809118023008, "grad_norm": 78.34741176743526, "learning_rate": 3.987074102450889e-07, "loss": 0.2116, "step": 1870, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9040139616055847, "success_rate.epoch.env.math": 0.956480605487228, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.7672651933701657, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859763258832812, "success_rate.epoch.global": 0.8690412426352437, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945195895522388, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.3994461014060503, "grad_norm": 101.8501578816676, "learning_rate": 3.986987854063667e-07, "loss": 0.4776, "step": 1875, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9043478260869565, "success_rate.epoch.env.math": 0.9565627950897073, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7673778389538886, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8595082921359247, "success_rate.epoch.global": 0.8688568376068376, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9922326589595376, "tokens_p.mean_in_band": 0.5869565217391305, "tokens_rate.above_band": 0.8826530612244898, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11734693877551021 }, { "epoch": 0.40051129100979976, "grad_norm": 92.16111321999547, "learning_rate": 3.9869013236424776e-07, "loss": 0.3388, "step": 1880, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9045138888888888, "success_rate.epoch.env.math": 0.9566037735849057, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7673301304049417, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8596435728488809, "success_rate.epoch.global": 0.8689397975492807, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979385018495684, "tokens_p.mean_in_band": 0.6731770833333334, "tokens_rate.above_band": 0.9963144963144963, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0036855036855036856 }, { "epoch": 0.4015764806135492, "grad_norm": 75.80936004452577, "learning_rate": 3.986814511250384e-07, "loss": 0.3434, "step": 1885, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9045138888888888, "success_rate.epoch.env.math": 0.9566446748350612, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.76775956284153, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8597728655900307, "success_rate.epoch.global": 0.8690223166843783, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965909090909091, "tokens_p.mean_in_band": 0.53466796875, "tokens_rate.above_band": 0.9649122807017544, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03508771929824561 }, { "epoch": 0.4026416702172987, "grad_norm": 207.99906922213967, "learning_rate": 3.986727416950655e-07, "loss": 0.3187, "step": 1890, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9048442906574394, "success_rate.epoch.env.math": 0.9559099437148217, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7680763983628922, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8597882575455966, "success_rate.epoch.global": 0.8691043985161632, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973730297723292, "tokens_p.mean_in_band": 0.6372767857142857, "tokens_rate.above_band": 0.9878892733564014, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012110726643598616 }, { "epoch": 0.40370685982104815, "grad_norm": 133.45399617049515, "learning_rate": 3.9866400408067625e-07, "loss": 0.2572, "step": 1895, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9051724137931034, "success_rate.epoch.env.math": 0.9560747663551402, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7680272108843538, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8598285992090006, "success_rate.epoch.global": 0.8691860465116279, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_below_band": 1.3597309589385986e-07, "tokens_p.mean_in_band": 0.7118055555555556, "tokens_rate.above_band": 0.9212598425196851, "tokens_rate.below_band": 0.007874015748031496, "tokens_rate.in_band": 0.07086614173228346 }, { "epoch": 0.4047720494247976, "grad_norm": 37.14186426230828, "learning_rate": 3.9865523828823873e-07, "loss": 0.403, "step": 1900, "success_rate.epoch.env.abd": 0.9819277108433735, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.952, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9051724137931034, "success_rate.epoch.env.math": 0.9562383612662942, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7676630434782609, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8599013545799225, "success_rate.epoch.global": 0.8692672641012125, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966877880184332, "tokens_p.mean_in_band": 0.51171875, "tokens_rate.above_band": 0.9908675799086758, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0091324200913242 }, { "epoch": 0.40583723902854707, "grad_norm": 286.06375095639777, "learning_rate": 3.9864644432414135e-07, "loss": 0.3238, "step": 1905, "success_rate.epoch.env.abd": 0.9820359281437125, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.952, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9051724137931034, "success_rate.epoch.env.math": 0.9564007421150278, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7664184157075152, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8598128064324978, "success_rate.epoch.global": 0.8688222923238696, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.7999999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9956808943089431, "tokens_p.mean_below_band": 1.7415732145309448e-07, "tokens_p.mean_in_band": 0.5652901785714286, "tokens_rate.above_band": 0.8913043478260869, "tokens_rate.below_band": 0.007246376811594203, "tokens_rate.in_band": 0.10144927536231885 }, { "epoch": 0.40690242863229653, "grad_norm": 96.81548796544416, "learning_rate": 3.9863762219479304e-07, "loss": 0.1935, "step": 1910, "success_rate.epoch.env.abd": 0.9820359281437125, "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9051724137931034, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7672064777327935, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8600150275307551, "success_rate.epoch.global": 0.8691662296801258, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991776315789473, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9973753280839895, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0026246719160104987 }, { "epoch": 0.407967618236046, "grad_norm": 346.6933265257248, "learning_rate": 3.9862877190662336e-07, "loss": 0.4523, "step": 1915, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9053356282271945, "success_rate.epoch.env.math": 0.9566820276497696, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7671601615074024, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8600499471423166, "success_rate.epoch.global": 0.8692468619246861, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956521739130435, "tokens_p.mean_in_band": 0.5880681818181818, "tokens_rate.above_band": 0.9126984126984127, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0873015873015873 }, { "epoch": 0.4090328078397955, "grad_norm": 295.83045843893126, "learning_rate": 3.9861989346608225e-07, "loss": 0.4063, "step": 1920, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9053356282271945, "success_rate.epoch.env.math": 0.9568411386593205, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7674731182795699, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860203142693316, "success_rate.epoch.global": 0.8695878977569118, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0004023913543343, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.9993106617647058, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0006893382352941177 }, { "epoch": 0.41009799744354497, "grad_norm": 50.90770892944702, "learning_rate": 3.9861098687964035e-07, "loss": 0.2016, "step": 1925, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9056603773584906, "success_rate.epoch.env.math": 0.9569202566452796, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.7680965147453083, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604030167536731, "success_rate.epoch.global": 0.8699271592091571, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994131455399061, "tokens_p.mean_in_band": 0.8333333333333334, "tokens_rate.above_band": 0.993006993006993, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006993006993006993 }, { "epoch": 0.41116318704729443, "grad_norm": 42.168066538962435, "learning_rate": 3.986020521537887e-07, "loss": 0.2325, "step": 1930, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.905982905982906, "success_rate.epoch.env.math": 0.956959706959707, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.768251841929002, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8606600925458516, "success_rate.epoch.global": 0.8702646600934094, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995320720356131, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.4122283766510439, "grad_norm": 145.53337290937844, "learning_rate": 3.985930892950388e-07, "loss": 0.2996, "step": 1935, "success_rate.epoch.env.abd": 0.9823529411764705, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9063032367972743, "success_rate.epoch.env.math": 0.9570383912248629, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.768561872909699, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604868493824734, "success_rate.epoch.global": 0.8703416149068323, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954268292682927, "tokens_p.mean_in_band": 0.5817307692307693, "tokens_rate.above_band": 0.9403669724770642, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05963302752293578 }, { "epoch": 0.41329356625479335, "grad_norm": 21.038500967777555, "learning_rate": 3.985840983099228e-07, "loss": 0.2493, "step": 1940, "success_rate.epoch.env.abd": 0.9824561403508771, "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9049235993208828, "success_rate.epoch.env.math": 0.9571948998178507, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7688710754843019, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604950469976104, "success_rate.epoch.global": 0.8704181724315952, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975743447580645, "tokens_p.mean_in_band": 0.5204190340909091, "tokens_rate.above_band": 0.9783037475345168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021696252465483234 }, { "epoch": 0.4143587558585428, "grad_norm": 178.0250680459173, "learning_rate": 3.9857507920499315e-07, "loss": 0.2773, "step": 1945, "success_rate.epoch.env.abd": 0.9826589595375722, "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9054054054054054, "success_rate.epoch.env.math": 0.9572727272727273, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.768, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604851722014097, "success_rate.epoch.global": 0.870236869207003, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9960526315789474, "tokens_p.mean_in_band": 0.44091796875, "tokens_rate.above_band": 0.9223300970873787, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07766990291262135 }, { "epoch": 0.4154239454622923, "grad_norm": 137.5432836863038, "learning_rate": 3.9856603198682303e-07, "loss": 0.3469, "step": 1950, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9562841530054644, "success_rate.epoch.env.logic": 0.9054054054054054, "success_rate.epoch.env.math": 0.957427536231884, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7672872340425532, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604653450300734, "success_rate.epoch.global": 0.8700564971751412, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958465189873418, "tokens_p.mean_in_band": 0.4979440789473684, "tokens_rate.above_band": 0.9432835820895522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056716417910447764 }, { "epoch": 0.41648913506604174, "grad_norm": 28.793385428920907, "learning_rate": 3.9855695666200597e-07, "loss": 0.2335, "step": 1955, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.9009009009009009, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9054054054054054, "success_rate.epoch.env.math": 0.9575045207956601, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7670869276708693, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605236640038331, "success_rate.epoch.global": 0.8701331967213115, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975110619469026, "tokens_p.mean_in_band": 0.64375, "tokens_rate.above_band": 0.9826086956521739, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017391304347826087 }, { "epoch": 0.4175543246697912, "grad_norm": 88.79062517186233, "learning_rate": 3.9854785323715596e-07, "loss": 0.3732, "step": 1960, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9040404040404041, "success_rate.epoch.env.math": 0.957542908762421, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7671957671957672, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605254239081749, "success_rate.epoch.global": 0.8699540112416965, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.86, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9991261185682326, "tokens_p.mean_in_band": 0.5973307291666666, "tokens_rate.above_band": 0.9612903225806452, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03870967741935484 }, { "epoch": 0.4186195142735407, "grad_norm": 52.39104396987938, "learning_rate": 3.985387217189075e-07, "loss": 0.2466, "step": 1965, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9046822742474916, "success_rate.epoch.env.math": 0.9566787003610109, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7678100263852242, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605610530895508, "success_rate.epoch.global": 0.8700305810397554, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977973568281938, "tokens_p.mean_in_band": 0.4231770833333333, "tokens_rate.above_band": 0.9934354485776805, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006564551422319475 }, { "epoch": 0.4196847038772902, "grad_norm": 65.68878592882285, "learning_rate": 3.985295621139156e-07, "loss": 0.2321, "step": 1970, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518716577540107, "success_rate.epoch.env.logic": 0.9048414023372288, "success_rate.epoch.env.math": 0.9567567567567568, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7685733070348455, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602079045561287, "success_rate.epoch.global": 0.8701067615658363, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995149534629072, "tokens_p.mean_below_band": 1.525040715932846e-08, "tokens_p.mean_in_band": 0.5329491725768322, "tokens_rate.above_band": 0.8960019622271278, "tokens_rate.below_band": 0.0002452783909737552, "tokens_rate.in_band": 0.10375275938189846 }, { "epoch": 0.42074989348103964, "grad_norm": 102.3532420465388, "learning_rate": 3.985203744288558e-07, "loss": 0.2261, "step": 1975, "success_rate.epoch.env.abd": 0.9831460674157303, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518716577540107, "success_rate.epoch.env.logic": 0.9051580698835274, "success_rate.epoch.env.math": 0.95695067264574, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7688772160210111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602906056815427, "success_rate.epoch.global": 0.8704361054766734, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9893973214285714, "tokens_p.mean_in_band": 0.751953125, "tokens_rate.above_band": 0.9824561403508771, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017543859649122806 }, { "epoch": 0.4218150830847891, "grad_norm": 433.8337714904573, "learning_rate": 3.985111586704238e-07, "loss": 0.3813, "step": 1980, "success_rate.epoch.env.abd": 0.9831460674157303, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.9051580698835274, "success_rate.epoch.env.math": 0.9571810883140054, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7680209698558322, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602569848927737, "success_rate.epoch.global": 0.870257966616085, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976154618473896, "tokens_p.mean_in_band": 0.4341517857142857, "tokens_rate.above_band": 0.97265625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02734375 }, { "epoch": 0.42288027268853856, "grad_norm": 75.35711064317096, "learning_rate": 3.9850191484533616e-07, "loss": 0.296, "step": 1985, "success_rate.epoch.env.abd": 0.9832402234636871, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9051580698835274, "success_rate.epoch.env.math": 0.9572953736654805, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.768125408229915, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603084551273191, "success_rate.epoch.global": 0.8703329969727548, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938350340136054, "tokens_p.mean_in_band": 0.65078125, "tokens_rate.above_band": 0.9671052631578947, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03289473684210526 }, { "epoch": 0.423945462292288, "grad_norm": 105.79826561046573, "learning_rate": 3.9849264296032966e-07, "loss": 0.3038, "step": 1990, "success_rate.epoch.env.abd": 0.9832402234636871, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9039735099337748, "success_rate.epoch.env.math": 0.9573712255772646, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7679269882659713, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860243953293457, "success_rate.epoch.global": 0.8701560140915954, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973378112712975, "tokens_p.mean_in_band": 0.55921875, "tokens_rate.above_band": 0.9838813668600903, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016118633139909737 }, { "epoch": 0.4250106518960375, "grad_norm": 583.0877240908128, "learning_rate": 3.984833430221616e-07, "loss": 0.4396, "step": 1995, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9041322314049587, "success_rate.epoch.env.math": 0.9574090505767524, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7683322517845554, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603071278260844, "success_rate.epoch.global": 0.8702309236947792, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949596774193549, "tokens_p.mean_in_band": 0.6495361328125, "tokens_rate.above_band": 0.9323308270676691, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06766917293233082 }, { "epoch": 0.42607584149978694, "grad_norm": 226.80312272086525, "learning_rate": 3.984740150376097e-07, "loss": 0.4215, "step": 2000, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9044481054365733, "success_rate.epoch.env.math": 0.9575596816976127, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7689320388349514, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604040634808908, "success_rate.epoch.global": 0.870555833750626, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9940308988764045, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9888888888888889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011111111111111112 }, { "epoch": 0.4271410311035364, "grad_norm": 85.21569872481132, "learning_rate": 3.984646590134721e-07, "loss": 0.3414, "step": 2005, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.9577092511013215, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7680878552971576, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603919896077737, "success_rate.epoch.global": 0.8703796203796204, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957356076759062, "tokens_p.mean_in_band": 0.5249467329545454, "tokens_rate.above_band": 0.9770833333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022916666666666665 }, { "epoch": 0.4282062207072859, "grad_norm": 39.98575523225476, "learning_rate": 3.9845527495656743e-07, "loss": 0.2913, "step": 2010, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9017857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9052287581699346, "success_rate.epoch.env.math": 0.9577464788732394, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7680412371134021, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605678449657698, "success_rate.epoch.global": 0.8704534130543099, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972998271889401, "tokens_p.mean_in_band": 0.6315104166666666, "tokens_rate.above_band": 0.9863636363636363, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013636363636363636 }, { "epoch": 0.4292714103110354, "grad_norm": 136.67788041878998, "learning_rate": 3.9844586287373476e-07, "loss": 0.2097, "step": 2015, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9052287581699346, "success_rate.epoch.env.math": 0.957931638913234, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7686375321336761, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8607179002413325, "success_rate.epoch.global": 0.8707753479125249, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965861344537815, "tokens_p.mean_in_band": 0.7521701388888888, "tokens_rate.above_band": 0.9814432989690721, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018556701030927835 }, { "epoch": 0.43033659991478485, "grad_norm": 45.39873717022019, "learning_rate": 3.984364227718334e-07, "loss": 0.3669, "step": 2020, "success_rate.epoch.env.abd": 0.9834254143646409, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9533678756476683, "success_rate.epoch.env.logic": 0.9055374592833876, "success_rate.epoch.env.math": 0.958041958041958, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7689345314505777, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860813443535701, "success_rate.epoch.global": 0.8710956866633615, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992419137466307, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.9973118279569892, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002688172043010753 }, { "epoch": 0.4314017895185343, "grad_norm": 76.85691562348057, "learning_rate": 3.984269546577434e-07, "loss": 0.5235, "step": 2025, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9533678756476683, "success_rate.epoch.env.logic": 0.9056910569105691, "success_rate.epoch.env.math": 0.958151700087184, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7676056338028169, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8607330421754333, "success_rate.epoch.global": 0.8706726013847675, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949664429530202, "tokens_p.mean_in_band": 0.6236298301003196, "tokens_rate.above_band": 0.93125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06875 }, { "epoch": 0.43246697912228377, "grad_norm": 69.19712439485015, "learning_rate": 3.98417458538365e-07, "loss": 0.3394, "step": 2030, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9059967585089141, "success_rate.epoch.env.math": 0.9582608695652174, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7679028132992327, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8608626610461776, "success_rate.epoch.global": 0.8709916132215096, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979368932038835, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9995147986414362, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00048520135856380397 }, { "epoch": 0.43353216872603323, "grad_norm": 229.1415625937778, "learning_rate": 3.984079344206189e-07, "loss": 0.337, "step": 2035, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9063004846526656, "success_rate.epoch.env.math": 0.9582971329278888, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7683004455760662, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609297175719282, "success_rate.epoch.global": 0.8710629921259843, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982, "tokens_p.mean_in_band": 0.7639508928571429, "tokens_rate.above_band": 0.9889240506329114, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011075949367088608 }, { "epoch": 0.4345973583297827, "grad_norm": 47.878039828477796, "learning_rate": 3.983983823114462e-07, "loss": 0.1873, "step": 2040, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9066022544283414, "success_rate.epoch.env.math": 0.9584775086505191, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7679593134138589, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860973606375179, "success_rate.epoch.global": 0.8711340206185567, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962993421052632, "tokens_p.mean_in_band": 0.453125, "tokens_rate.above_band": 0.9743589743589743, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02564102564102564 }, { "epoch": 0.43566254793353215, "grad_norm": 734.933175739297, "learning_rate": 3.983888022178084e-07, "loss": 0.4017, "step": 2045, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9066022544283414, "success_rate.epoch.env.math": 0.9585849870578085, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7683544303797468, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610192968636496, "success_rate.epoch.global": 0.8712047012732616, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895833333333334, "tokens_p.mean_in_band": 0.587890625, "tokens_rate.above_band": 0.9310344827586207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06896551724137931 }, { "epoch": 0.4367277375372816, "grad_norm": 62.88577151831177, "learning_rate": 3.983791941466874e-07, "loss": 0.495, "step": 2050, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.9067524115755627, "success_rate.epoch.env.math": 0.9577950043066322, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7692307692307693, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610619879005758, "success_rate.epoch.global": 0.8712750366389839, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964622641509434, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9754601226993865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024539877300613498 }, { "epoch": 0.43779292714103113, "grad_norm": 26.450002652779286, "learning_rate": 3.983695581050855e-07, "loss": 0.2639, "step": 2055, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.9072, "success_rate.epoch.env.math": 0.9570077386070507, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.769811320754717, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611144912460944, "success_rate.epoch.global": 0.8713450292397661, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964953271028038, "tokens_p.mean_in_band": 0.6625, "tokens_rate.above_band": 0.9553571428571429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044642857142857144 }, { "epoch": 0.4388581167447806, "grad_norm": 91.69596708735087, "learning_rate": 3.983598941000254e-07, "loss": 0.6407, "step": 2060, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.9059011164274322, "success_rate.epoch.env.math": 0.9571550985432733, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7697616060225847, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610052877580511, "success_rate.epoch.global": 0.8711716091395236, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.6252604166666667, "tokens_rate.above_band": 0.9142857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08571428571428572 }, { "epoch": 0.43992330634853005, "grad_norm": 91.43104965971058, "learning_rate": 3.983502021385502e-07, "loss": 0.2632, "step": 2065, "success_rate.epoch.env.abd": 0.9836956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.9026548672566371, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9060509554140127, "success_rate.epoch.env.math": 0.9563356164383562, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7698561601000625, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610865754650177, "success_rate.epoch.global": 0.8709990300678953, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7999999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983701814058957, "tokens_p.mean_in_band": 0.4559659090909091, "tokens_rate.above_band": 0.9756637168141593, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024336283185840708 }, { "epoch": 0.4409884959522795, "grad_norm": 265.90081511867646, "learning_rate": 3.983404822277232e-07, "loss": 0.3087, "step": 2070, "success_rate.epoch.env.abd": 0.9837837837837838, "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9049128367670365, "success_rate.epoch.env.math": 0.9563729683490163, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7704304429195259, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603269047869854, "success_rate.epoch.global": 0.8708272859216255, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992339121552605, "tokens_p.mean_in_band": 0.5140086206896551, "tokens_rate.above_band": 0.9712301587301587, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028769841269841268 }, { "epoch": 0.442053685556029, "grad_norm": 272.6197297841009, "learning_rate": 3.983307343746283e-07, "loss": 0.3549, "step": 2075, "success_rate.epoch.env.abd": 0.9837837837837838, "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9034810126582279, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7703795892968264, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602264055007297, "success_rate.epoch.global": 0.8706563706563707, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9996594551282051, "tokens_p.mean_in_band": 0.48061342592592593, "tokens_rate.above_band": 0.9829867674858223, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017013232514177693 }, { "epoch": 0.44311887515977844, "grad_norm": 136.09696670608895, "learning_rate": 3.983209585863696e-07, "loss": 0.2975, "step": 2080, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9036334913112164, "success_rate.epoch.env.math": 0.9565587734241908, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7713754646840149, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603420938909193, "success_rate.epoch.global": 0.8709677419354839, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9921085858585859, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.9611650485436893, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038834951456310676 }, { "epoch": 0.4441840647635279, "grad_norm": 307.72171262899406, "learning_rate": 3.983111548700717e-07, "loss": 0.3543, "step": 2085, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8879310344827587, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9037854889589906, "success_rate.epoch.env.math": 0.9566694987255735, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7716584158415841, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8599008948236437, "success_rate.epoch.global": 0.8710374639769453, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987739755884917, "tokens_p.mean_in_band": 0.7448381696428571, "tokens_rate.above_band": 0.9879414298018949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012058570198105082 }, { "epoch": 0.44524925436727736, "grad_norm": 253.67080269735254, "learning_rate": 3.983013232328794e-07, "loss": 0.4017, "step": 2090, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9039370078740158, "success_rate.epoch.env.math": 0.9567796610169491, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7723627390499691, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8600757910800905, "success_rate.epoch.global": 0.8713464302827024, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923349056603774, "tokens_p.mean_in_band": 0.7408854166666666, "tokens_rate.above_band": 0.9814814814814815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018518518518518517 }, { "epoch": 0.4463144439710268, "grad_norm": 27.330875885209775, "learning_rate": 3.98291463681958e-07, "loss": 0.3031, "step": 2095, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9558823529411765, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.955, "success_rate.epoch.env.logic": 0.9039370078740158, "success_rate.epoch.env.math": 0.956081081081081, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7716923076923077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860001601693305, "success_rate.epoch.global": 0.8709369024856597, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973404255319149, "tokens_p.mean_in_band": 0.5490056818181818, "tokens_rate.above_band": 0.914396887159533, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08560311284046693 }, { "epoch": 0.44737963357477634, "grad_norm": 45.25101740398943, "learning_rate": 3.9828157622449305e-07, "loss": 0.255, "step": 2100, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9040880503144654, "success_rate.epoch.env.math": 0.9561551433389545, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7721130221130221, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8601387913210604, "success_rate.epoch.global": 0.871244635193133, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9950153374233128, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9939024390243902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006097560975609756 }, { "epoch": 0.4484448231785258, "grad_norm": 151.39764527816436, "learning_rate": 3.9827166086769046e-07, "loss": 0.4648, "step": 2105, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9045383411580594, "success_rate.epoch.env.math": 0.9562289562289562, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7723926380368098, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602761630646246, "success_rate.epoch.global": 0.8715509039010466, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988046448087432, "tokens_p.mean_in_band": 0.8802083333333334, "tokens_rate.above_band": 0.991869918699187, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008130081300813009 }, { "epoch": 0.44951001278227526, "grad_norm": 43.74072739264825, "learning_rate": 3.9826171761877647e-07, "loss": 0.391, "step": 2110, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9046875, "success_rate.epoch.env.math": 0.9563758389261745, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7711138310893513, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602784629835182, "success_rate.epoch.global": 0.8711438063597532, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9947429906542056, "tokens_p.mean_in_band": 0.32502297794117646, "tokens_rate.above_band": 0.9264069264069265, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0735930735930736 }, { "epoch": 0.4505752023860247, "grad_norm": 38.731010504410676, "learning_rate": 3.9825174648499756e-07, "loss": 0.3777, "step": 2115, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8898305084745762, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9048361934477379, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7709224190592547, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604010768540065, "success_rate.epoch.global": 0.8712121212121212, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986066878980892, "tokens_p.mean_in_band": 0.3430989583333333, "tokens_rate.above_band": 0.9751552795031055, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024844720496894408 }, { "epoch": 0.4516403919897742, "grad_norm": 145.91802969323246, "learning_rate": 3.9824174747362073e-07, "loss": 0.2371, "step": 2120, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8898305084745762, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9051321928460342, "success_rate.epoch.env.math": 0.9565580618212197, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7714808043875686, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605176346949593, "success_rate.epoch.global": 0.8715162966461975, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997362012987013, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9871794871794872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01282051282051282 }, { "epoch": 0.45270558159352364, "grad_norm": 114.67775463415884, "learning_rate": 3.982317205919332e-07, "loss": 0.3487, "step": 2125, "success_rate.epoch.env.abd": 0.9842105263157894, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9051321928460342, "success_rate.epoch.env.math": 0.9567027477102414, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7720364741641338, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8599091192909853, "success_rate.epoch.global": 0.8715834118755891, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979423868312757, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9604743083003953, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039525691699604744 }, { "epoch": 0.4537707711972731, "grad_norm": 82.41558198000548, "learning_rate": 3.982216658472424e-07, "loss": 0.4364, "step": 2130, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9055727554179567, "success_rate.epoch.env.math": 0.9567027477102414, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7721212121212121, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859971826037641, "success_rate.epoch.global": 0.8716502115655853, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984375, "tokens_p.mean_in_band": 0.4053819444444444, "tokens_rate.above_band": 0.9302325581395349, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06976744186046512 }, { "epoch": 0.45483596080102257, "grad_norm": 60.606817030987585, "learning_rate": 3.982115832468762e-07, "loss": 0.3282, "step": 2135, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9057187017001546, "success_rate.epoch.env.math": 0.9568464730290457, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7726723095525998, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8601373861457549, "success_rate.epoch.global": 0.8719512195121951, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985250737463127, "tokens_p.mean_in_band": 0.6146763392857143, "tokens_rate.above_band": 0.9797687861271677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02023121387283237 }, { "epoch": 0.455901150404772, "grad_norm": 184.22197601349504, "learning_rate": 3.982014727981827e-07, "loss": 0.3728, "step": 2140, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9556650246305419, "success_rate.epoch.env.logic": 0.906441717791411, "success_rate.epoch.env.math": 0.956089478044739, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7728096676737161, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602528251458569, "success_rate.epoch.global": 0.8720168460458587, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994424168694241, "tokens_p.mean_in_band": 0.49951171875, "tokens_rate.above_band": 0.9983805668016195, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016194331983805667 }, { "epoch": 0.45696634000852154, "grad_norm": 58.82473441628496, "learning_rate": 3.9819133450853043e-07, "loss": 0.377, "step": 2145, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9065849923430321, "success_rate.epoch.env.math": 0.9561258278145696, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7734939759036145, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8599487541095314, "success_rate.epoch.global": 0.8720821661998133, "success_rate.window.env.ded": 0.6666666666666666, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957092377587077, "tokens_p.mean_in_band": 0.5876865671641791, "tokens_rate.above_band": 0.9078826764436297, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0921173235563703 }, { "epoch": 0.458031529612271, "grad_norm": 96.23351301220453, "learning_rate": 3.981811683853079e-07, "loss": 0.4288, "step": 2150, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9067278287461774, "success_rate.epoch.env.math": 0.9561621174524401, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7732453509298141, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8599696787849692, "success_rate.epoch.global": 0.871914299021891, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954879679144385, "tokens_p.mean_in_band": 0.70265625, "tokens_rate.above_band": 0.8820754716981132, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1179245283018868 }, { "epoch": 0.45909671921602047, "grad_norm": 31.926622437433984, "learning_rate": 3.9817097443592435e-07, "loss": 0.3853, "step": 2155, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9068702290076336, "success_rate.epoch.env.math": 0.9562706270627063, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7722653915122535, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8599034015535295, "success_rate.epoch.global": 0.8715148698884758, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9879557291666666, "tokens_p.mean_in_band": 0.5868389423076923, "tokens_rate.above_band": 0.7868852459016393, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21311475409836064 }, { "epoch": 0.4601619088197699, "grad_norm": 44.95998243278196, "learning_rate": 3.9816075266780886e-07, "loss": 0.3233, "step": 2160, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9057750759878419, "success_rate.epoch.env.math": 0.9563786008230453, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7728085867620751, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8598630393708357, "success_rate.epoch.global": 0.8715808993973111, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992741935483871, "tokens_p.mean_in_band": 0.69384765625, "tokens_rate.above_band": 0.950920245398773, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049079754601226995 }, { "epoch": 0.4612270984235194, "grad_norm": 96.85026338184036, "learning_rate": 3.981505030884111e-07, "loss": 0.1844, "step": 2165, "success_rate.epoch.env.abd": 0.9845360824742269, "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9059180576631259, "success_rate.epoch.env.math": 0.9564860426929392, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7729439809296782, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8600866274156558, "success_rate.epoch.global": 0.8718778908418131, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984357997823722, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.46229228802726885, "grad_norm": 63.37427154117726, "learning_rate": 3.981402257052008e-07, "loss": 0.4072, "step": 2170, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9059180576631259, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7738872403560831, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602041518321543, "success_rate.epoch.global": 0.8721735117674204, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977034120734908, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9973821989528796, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002617801047120419 }, { "epoch": 0.4633574776310183, "grad_norm": 101.70573887188247, "learning_rate": 3.981299205256681e-07, "loss": 0.3496, "step": 2175, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, "success_rate.epoch.env.agentgym:sciworld": 0.958041958041958, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9059180576631259, "success_rate.epoch.env.math": 0.9566993464052288, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7738306690349319, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602420168432782, "success_rate.epoch.global": 0.8722375690607734, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968065693430657, "tokens_p.mean_in_band": 0.6098090277777778, "tokens_rate.above_band": 0.9383561643835616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06164383561643835 }, { "epoch": 0.46442266723476777, "grad_norm": 149.27580544374308, "learning_rate": 3.981195875573234e-07, "loss": 0.2298, "step": 2180, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9063444108761329, "success_rate.epoch.env.math": 0.9567699836867863, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7736406619385343, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603225363897672, "success_rate.epoch.global": 0.8723013321084061, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979166666666667, "tokens_p.mean_in_band": 0.7076822916666666, "tokens_rate.above_band": 0.9770114942528736, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022988505747126436 }, { "epoch": 0.46548785683851723, "grad_norm": 58.87499101498911, "learning_rate": 3.981092268076971e-07, "loss": 0.2147, "step": 2185, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8861788617886179, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9066265060240963, "success_rate.epoch.env.math": 0.9568755085435313, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7739079102715466, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604997856039944, "success_rate.epoch.global": 0.8725939505041247, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997032122905028, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9962894248608535, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0037105751391465678 }, { "epoch": 0.46655304644226675, "grad_norm": 190.27091460647455, "learning_rate": 3.9809883828434024e-07, "loss": 0.3868, "step": 2190, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9519230769230769, "success_rate.epoch.env.logic": 0.9052631578947369, "success_rate.epoch.env.math": 0.9569805194805194, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7744405182567726, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605383709553419, "success_rate.epoch.global": 0.8726566072245084, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993342210386151, "tokens_p.mean_in_band": 0.5615234375, "tokens_rate.above_band": 0.9740596627756161, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02594033722438392 }, { "epoch": 0.4676182360460162, "grad_norm": 31.74037483645768, "learning_rate": 3.9808842199482387e-07, "loss": 0.2583, "step": 2195, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9521531100478469, "success_rate.epoch.env.logic": 0.9054054054054054, "success_rate.epoch.env.math": 0.9570502431118314, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7746478873239436, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605974048947893, "success_rate.epoch.global": 0.8727189781021898, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967873831775701, "tokens_p.mean_in_band": 0.6293402777777778, "tokens_rate.above_band": 0.9727272727272728, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02727272727272727 }, { "epoch": 0.4686834256497657, "grad_norm": 73.87236250917555, "learning_rate": 3.980779779467392e-07, "loss": 0.2601, "step": 2200, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95260663507109, "success_rate.epoch.env.logic": 0.9055472263868066, "success_rate.epoch.env.math": 0.9563106796116505, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.775175644028103, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8606393353731224, "success_rate.epoch.global": 0.8727810650887574, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992465932914046, "tokens_p.mean_in_band": 0.6692708333333334, "tokens_rate.above_band": 0.99375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00625 }, { "epoch": 0.46974861525351513, "grad_norm": 200.78911066176883, "learning_rate": 3.980675061476978e-07, "loss": 0.6, "step": 2205, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95260663507109, "success_rate.epoch.env.logic": 0.9059701492537313, "success_rate.epoch.env.math": 0.9564164648910412, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7751168224299065, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8606820523320422, "success_rate.epoch.global": 0.8728428701180745, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946646341463414, "tokens_p.mean_in_band": 0.5703125, "tokens_rate.above_band": 0.9318181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06818181818181818 }, { "epoch": 0.4708138048572646, "grad_norm": 38.099097895050534, "learning_rate": 3.980570066053315e-07, "loss": 0.3271, "step": 2210, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9063893016344725, "success_rate.epoch.env.math": 0.9564867042707494, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.7744755244755245, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8606885657333353, "success_rate.epoch.global": 0.8726778432260988, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992142166344294, "tokens_p.mean_in_band": 0.4803602430555556, "tokens_rate.above_band": 0.9828897338403042, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017110266159695818 }, { "epoch": 0.47187899446101406, "grad_norm": 466.8689306661385, "learning_rate": 3.980464793272923e-07, "loss": 0.3609, "step": 2215, "success_rate.epoch.env.abd": 0.9849246231155779, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9065281899109793, "success_rate.epoch.env.math": 0.9565916398713826, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.7737056428155905, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603956556314917, "success_rate.epoch.global": 0.8722875226039783, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.6666666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971590909090909, "tokens_p.mean_in_band": 0.5126008064516129, "tokens_rate.above_band": 0.8864468864468864, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11355311355311355 }, { "epoch": 0.4729441840647635, "grad_norm": 76.79584246966465, "learning_rate": 3.980359243212524e-07, "loss": 0.3829, "step": 2220, "success_rate.epoch.env.abd": 0.985, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9066666666666666, "success_rate.epoch.env.math": 0.9565916398713826, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.774174869716271, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604577538624727, "success_rate.epoch.global": 0.8723500225529995, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9902836134453782, "tokens_p.mean_in_band": 0.745703125, "tokens_rate.above_band": 0.9224806201550387, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07751937984496124 }, { "epoch": 0.474009373668513, "grad_norm": 128.3435347931603, "learning_rate": 3.980253415949041e-07, "loss": 0.3236, "step": 2225, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9066666666666666, "success_rate.epoch.env.math": 0.9551641313050441, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.7738577212261423, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8596809038741906, "success_rate.epoch.global": 0.8717371737173717, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.62, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9984697164948454, "tokens_p.mean_in_band": 0.6244419642857143, "tokens_rate.above_band": 0.9651741293532339, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03482587064676617 }, { "epoch": 0.47507456327226244, "grad_norm": 77.02831348025278, "learning_rate": 3.9801473115596027e-07, "loss": 0.3993, "step": 2230, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9057437407952872, "success_rate.epoch.env.math": 0.9552, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.7743796884016156, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8597342939606843, "success_rate.epoch.global": 0.8718006286484059, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997120786516854, "tokens_p.mean_in_band": 0.5649038461538461, "tokens_rate.above_band": 0.9716157205240175, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028384279475982533 }, { "epoch": 0.47613975287601196, "grad_norm": 8.984483501970702, "learning_rate": 3.980040930121534e-07, "loss": 0.401, "step": 2235, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9044117647058824, "success_rate.epoch.env.math": 0.9553072625698324, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7733026467203682, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8592845432473823, "success_rate.epoch.global": 0.8709677419354839, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.35, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9969035823170732, "tokens_p.mean_in_band": 0.4986979166666667, "tokens_rate.above_band": 0.9162011173184358, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08379888268156424 }, { "epoch": 0.4772049424797614, "grad_norm": 178.98429439045935, "learning_rate": 3.979934271712367e-07, "loss": 0.1988, "step": 2240, "success_rate.epoch.env.abd": 0.9851485148514851, "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9046920821114369, "success_rate.epoch.env.math": 0.9553784860557769, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7735632183908046, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594520676309997, "success_rate.epoch.global": 0.8712561466249441, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0015008576329332, "tokens_p.mean_in_band": 0.8763020833333334, "tokens_rate.above_band": 0.9948805460750854, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005119453924914676 }, { "epoch": 0.4782701320835109, "grad_norm": 383.0964586358051, "learning_rate": 3.9798273364098327e-07, "loss": 0.4197, "step": 2245, "success_rate.epoch.env.abd": 0.9851485148514851, "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9048316251830161, "success_rate.epoch.env.math": 0.9555202541699762, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7736389684813754, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594845277470313, "success_rate.epoch.global": 0.8713202497769849, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.45535714285714285, "tokens_rate.above_band": 0.851063829787234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14893617021276595 }, { "epoch": 0.47933532168726034, "grad_norm": 104.10808818476188, "learning_rate": 3.9797201242918657e-07, "loss": 0.4295, "step": 2250, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9049707602339181, "success_rate.epoch.env.math": 0.9556611243072051, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.773013150371641, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594597411201004, "success_rate.epoch.global": 0.8711615487316422, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924479166666667, "tokens_p.mean_in_band": 0.5245028409090909, "tokens_rate.above_band": 0.916030534351145, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08396946564885496 }, { "epoch": 0.4804005112910098, "grad_norm": 149.18604883650156, "learning_rate": 3.9796126354366e-07, "loss": 0.2466, "step": 2255, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9493087557603687, "success_rate.epoch.env.logic": 0.9053857350800583, "success_rate.epoch.env.math": 0.9556962025316456, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7732724157624215, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8592479031569021, "success_rate.epoch.global": 0.8712255772646537, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.6666666666666666, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9926783376963351, "tokens_p.mean_in_band": 0.5896739130434783, "tokens_rate.above_band": 0.8925233644859814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10747663551401869 }, { "epoch": 0.48146570089475926, "grad_norm": 69.58324827557273, "learning_rate": 3.979504869922374e-07, "loss": 0.3747, "step": 2260, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9493087557603687, "success_rate.epoch.env.logic": 0.9042089985486212, "success_rate.epoch.env.math": 0.9558011049723757, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7736602052451539, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8592111321816632, "success_rate.epoch.global": 0.8712893221089942, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978448275862069, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9592875318066157, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04071246819338423 }, { "epoch": 0.4825308904985087, "grad_norm": 441.5440760869613, "learning_rate": 3.9793968278277267e-07, "loss": 0.463, "step": 2265, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9493087557603687, "success_rate.epoch.env.logic": 0.9046242774566474, "success_rate.epoch.env.math": 0.9558707643814027, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7733485193621867, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8593329780017819, "success_rate.epoch.global": 0.8713243422507185, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9984722222222222, "tokens_p.mean_in_band": 0.66796875, "tokens_rate.above_band": 0.974025974025974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025974025974025976 }, { "epoch": 0.4835960801022582, "grad_norm": 132.56110870414003, "learning_rate": 3.9792885092313973e-07, "loss": 0.2842, "step": 2270, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9495412844036697, "success_rate.epoch.env.logic": 0.9048991354466859, "success_rate.epoch.env.math": 0.9551886792452831, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7737350767481523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859376972824317, "success_rate.epoch.global": 0.871387602029561, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946933962264151, "tokens_p.mean_in_band": 0.5825892857142857, "tokens_rate.above_band": 0.9784615384615385, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021538461538461538 }, { "epoch": 0.48466126970600765, "grad_norm": 90.24017032065915, "learning_rate": 3.979179914212328e-07, "loss": 0.4124, "step": 2275, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9497716894977168, "success_rate.epoch.env.logic": 0.9050359712230216, "success_rate.epoch.env.math": 0.9553641346906813, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.7734241908006815, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594046322005464, "success_rate.epoch.global": 0.8714505833149901, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967984409799554, "tokens_p.mean_in_band": 0.486328125, "tokens_rate.above_band": 0.9868131868131869, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013186813186813187 }, { "epoch": 0.48572645930975716, "grad_norm": 55.49851071523598, "learning_rate": 3.9790710428496615e-07, "loss": 0.3556, "step": 2280, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9497716894977168, "success_rate.epoch.env.logic": 0.9053084648493543, "success_rate.epoch.env.math": 0.9554339327599687, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7728045325779037, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622968207385299, "success_rate.epoch.global": 0.8712936525367889, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942642405063291, "tokens_p.mean_in_band": 0.4583333333333333, "tokens_rate.above_band": 0.9132947976878613, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08670520231213873 }, { "epoch": 0.4867916489135066, "grad_norm": 99.65009153691177, "learning_rate": 3.9789618952227435e-07, "loss": 0.4012, "step": 2285, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9045584045584045, "success_rate.epoch.env.math": 0.9555382215288611, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7723669309173273, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622190877675888, "success_rate.epoch.global": 0.871137409598948, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.8, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9996411795407099, "tokens_p.mean_in_band": 0.4058159722222222, "tokens_rate.above_band": 0.9637826961770624, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03621730382293763 }, { "epoch": 0.4878568385172561, "grad_norm": 136.6075873900123, "learning_rate": 3.9788524714111197e-07, "loss": 0.3298, "step": 2290, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9504504504504504, "success_rate.epoch.env.logic": 0.9048295454545454, "success_rate.epoch.env.math": 0.955607476635514, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.772316384180791, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862286387741835, "success_rate.epoch.global": 0.8712005248195933, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979262483574245, "tokens_p.mean_in_band": 0.6290564903846154, "tokens_rate.above_band": 0.9915309446254071, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008469055374592834 }, { "epoch": 0.48892202812100555, "grad_norm": 255.3266808259307, "learning_rate": 3.978742771494537e-07, "loss": 0.2824, "step": 2295, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508928571428571, "success_rate.epoch.env.logic": 0.9050991501416431, "success_rate.epoch.env.math": 0.9557453416149069, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7725733634311512, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623870109790402, "success_rate.epoch.global": 0.8714815622954397, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995192307692308, "tokens_p.mean_in_band": 0.72265625, "tokens_rate.above_band": 0.994263862332696, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0057361376673040155 }, { "epoch": 0.489987217724755, "grad_norm": 181.1578075830466, "learning_rate": 3.9786327955529445e-07, "loss": 0.3752, "step": 2300, "success_rate.epoch.env.abd": 0.9854368932038835, "success_rate.epoch.env.agentgym:alfworld": 0.8828125, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9052333804809052, "success_rate.epoch.env.math": 0.9558139534883721, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7725225225225225, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624515338148165, "success_rate.epoch.global": 0.8715436533855868, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0006224370240187, "tokens_p.mean_in_band": 0.6650390625, "tokens_rate.above_band": 0.9976621858562245, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0023378141437755697 }, { "epoch": 0.49105240732850447, "grad_norm": 149.1546908914634, "learning_rate": 3.978522543666491e-07, "loss": 0.4842, "step": 2305, "success_rate.epoch.env.abd": 0.9854368932038835, "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9055007052186178, "success_rate.epoch.env.math": 0.9558481797056545, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7721661054994389, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625291305570411, "success_rate.epoch.global": 0.8713882250706061, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983462591240876, "tokens_p.mean_in_band": 0.4453125, "tokens_rate.above_band": 0.9647887323943662, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035211267605633804 }, { "epoch": 0.49211759693225393, "grad_norm": 141.89747014964618, "learning_rate": 3.978412015915528e-07, "loss": 0.2546, "step": 2310, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9056338028169014, "success_rate.epoch.env.math": 0.955984555984556, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7721164613661814, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625555108222097, "success_rate.epoch.global": 0.8714502492954693, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9920138888888889, "tokens_p.mean_in_band": 0.620703125, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 0.4931827865360034, "grad_norm": 91.78071221949968, "learning_rate": 3.978301212380607e-07, "loss": 0.2814, "step": 2315, "success_rate.epoch.env.abd": 0.9855769230769231, "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.906030855539972, "success_rate.epoch.env.math": 0.9560185185185185, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7715083798882681, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626221423325448, "success_rate.epoch.global": 0.8712956954358642, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973776223776224, "tokens_p.mean_in_band": 0.4418402777777778, "tokens_rate.above_band": 0.9407894736842105, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05921052631578947 }, { "epoch": 0.49424797613975285, "grad_norm": 83.92647100878033, "learning_rate": 3.9781901331424813e-07, "loss": 0.3356, "step": 2320, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9061624649859944, "success_rate.epoch.env.math": 0.9560862865947611, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7720178372352285, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627231512569846, "success_rate.epoch.global": 0.871573494496007, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987547438330171, "tokens_p.mean_in_band": 0.853515625, "tokens_rate.above_band": 0.996219281663516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003780718336483932 }, { "epoch": 0.49531316574350237, "grad_norm": 144.07250038750462, "learning_rate": 3.9780787782821046e-07, "loss": 0.3308, "step": 2325, "success_rate.epoch.env.abd": 0.9858490566037735, "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9062937062937063, "success_rate.epoch.env.math": 0.9561538461538461, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7725250278086763, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628233492076169, "success_rate.epoch.global": 0.8718500969200947, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963362068965518, "tokens_p.mean_in_band": 0.62109375, "tokens_rate.above_band": 0.9731543624161074, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026845637583892617 }, { "epoch": 0.49637835534725183, "grad_norm": 86.81295869589223, "learning_rate": 3.9779671478806306e-07, "loss": 0.3185, "step": 2330, "success_rate.epoch.env.abd": 0.9858490566037735, "success_rate.epoch.env.agentgym:alfworld": 0.8769230769230769, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9066852367688022, "success_rate.epoch.env.math": 0.9561875480399693, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7726514730405781, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623668232245664, "success_rate.epoch.global": 0.8719105953148506, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985281447443998, "tokens_p.mean_in_band": 0.68212890625, "tokens_rate.above_band": 0.9977077363896848, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002292263610315186 }, { "epoch": 0.4974435449510013, "grad_norm": 60.215911289432135, "learning_rate": 3.977855242019416e-07, "loss": 0.3292, "step": 2335, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8769230769230769, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9068150208623088, "success_rate.epoch.env.math": 0.955487336914812, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7734072022160665, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623897085425493, "success_rate.epoch.global": 0.8719708342268926, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941007653061225, "tokens_p.mean_in_band": 0.662890625, "tokens_rate.above_band": 0.9074074074074074, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09259259259259259 }, { "epoch": 0.49850873455475075, "grad_norm": 151.45533500262073, "learning_rate": 3.9777430607800157e-07, "loss": 0.2186, "step": 2340, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.907202216066482, "success_rate.epoch.env.math": 0.9555555555555556, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7737831858407079, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862556684126167, "success_rate.epoch.global": 0.8722448106141665, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982394366197183, "tokens_p.mean_in_band": 0.8138020833333334, "tokens_rate.above_band": 0.9916201117318436, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008379888268156424 }, { "epoch": 0.4995739241585002, "grad_norm": 182.9995792946016, "learning_rate": 3.9776306042441874e-07, "loss": 0.3115, "step": 2345, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9075862068965517, "success_rate.epoch.env.math": 0.9556235654169855, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7744070601213459, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862654490941816, "success_rate.epoch.global": 0.8725176169122357, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958584337349398, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9764705882352941, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023529411764705882 }, { "epoch": 0.5006391137622497, "grad_norm": 54.47830334472629, "learning_rate": 3.977517872493889e-07, "loss": 0.3368, "step": 2350, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.90646492434663, "success_rate.epoch.env.math": 0.9557589626239512, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7749036873968079, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626100129356801, "success_rate.epoch.global": 0.8725761772853186, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957264957264957, "tokens_p.mean_in_band": 0.6669921875, "tokens_rate.above_band": 0.936, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.064 }, { "epoch": 0.5017043033659991, "grad_norm": 32.118313219529966, "learning_rate": 3.9774048656112775e-07, "loss": 0.2346, "step": 2355, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9069767441860465, "success_rate.epoch.env.math": 0.9557926829268293, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7748489840746843, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626780888226062, "success_rate.epoch.global": 0.8726344886242824, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942196531791907, "tokens_p.mean_in_band": 0.5569196428571429, "tokens_rate.above_band": 0.9251336898395722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0748663101604278 }, { "epoch": 0.5027694929697486, "grad_norm": 458.57098464456413, "learning_rate": 3.977291583678712e-07, "loss": 0.333, "step": 2360, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8778625954198473, "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9072305593451568, "success_rate.epoch.env.math": 0.9558599695585996, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7750410509031199, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627247405152715, "success_rate.epoch.global": 0.8726925525143221, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9903273809523809, "tokens_p.mean_in_band": 0.5559895833333334, "tokens_rate.above_band": 0.9333333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06666666666666667 }, { "epoch": 0.5038346825734981, "grad_norm": 56.32929499081233, "learning_rate": 3.977178026778752e-07, "loss": 0.3516, "step": 2365, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9061224489795918, "success_rate.epoch.env.math": 0.9559270516717325, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7752870420995079, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8654994053005809, "success_rate.epoch.global": 0.8727503705272073, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977886405959032, "tokens_p.mean_in_band": 0.716015625, "tokens_rate.above_band": 0.9817184643510055, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018281535648994516 }, { "epoch": 0.5048998721772475, "grad_norm": 93.0944463656511, "learning_rate": 3.977064194994156e-07, "loss": 0.2611, "step": 2370, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9061224489795918, "success_rate.epoch.env.math": 0.9561270801815431, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7755324959038776, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8656453135373464, "success_rate.epoch.global": 0.8730192267061061, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981566011235955, "tokens_p.mean_in_band": 0.869140625, "tokens_rate.above_band": 0.994413407821229, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00558659217877095 }, { "epoch": 0.505965061780997, "grad_norm": 0.0, "learning_rate": 3.976950088407885e-07, "loss": 0.4418, "step": 2375, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9517543859649122, "success_rate.epoch.env.logic": 0.9061224489795918, "success_rate.epoch.env.math": 0.9563581640331076, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7757774140752864, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8657079078798572, "success_rate.epoch.global": 0.8732869491882774, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.000475888324873, "tokens_p.mean_in_band": 0.796875, "tokens_rate.above_band": 0.9987325728770595, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012674271229404308 }, { "epoch": 0.5070302513847464, "grad_norm": 512.0783073177407, "learning_rate": 3.9768357071030974e-07, "loss": 0.2407, "step": 2380, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9521739130434783, "success_rate.epoch.env.logic": 0.9063772048846676, "success_rate.epoch.env.math": 0.9564237415477085, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7762656505171476, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8658195530562303, "success_rate.epoch.global": 0.873553545129392, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999405705229794, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.9968404423380727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00315955766192733 }, { "epoch": 0.5080954409884959, "grad_norm": 76.92824746945375, "learning_rate": 3.976721051163155e-07, "loss": 0.2361, "step": 2385, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9065040650406504, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7767517653449212, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8659252794067704, "success_rate.epoch.global": 0.8738190216250262, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984697164948454, "tokens_p.mean_in_band": 0.865234375, "tokens_rate.above_band": 0.9948717948717949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005128205128205128 }, { "epoch": 0.5091606305922454, "grad_norm": 121.21388088142787, "learning_rate": 3.976606120671618e-07, "loss": 0.3901, "step": 2390, "success_rate.epoch.env.abd": 0.986046511627907, "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9065040650406504, "success_rate.epoch.env.math": 0.9565868263473054, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7758527341635084, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8658773826992614, "success_rate.epoch.global": 0.8734548501990362, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9967873831775701, "tokens_p.mean_in_band": 0.34765625, "tokens_rate.above_band": 0.8699186991869918, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13008130081300814 }, { "epoch": 0.5102258201959949, "grad_norm": 67.57421037977869, "learning_rate": 3.976490915712247e-07, "loss": 0.2638, "step": 2395, "success_rate.epoch.env.abd": 0.986046511627907, "success_rate.epoch.env.agentgym:alfworld": 0.8796992481203008, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9065040650406504, "success_rate.epoch.env.math": 0.9567486950037286, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.775796866558617, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8659056786599097, "success_rate.epoch.global": 0.8735103491532511, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000289054470709, "tokens_p.mean_in_band": 0.4290364583333333, "tokens_rate.above_band": 0.9878172588832488, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012182741116751269 }, { "epoch": 0.5112910097997444, "grad_norm": 56.527684669061045, "learning_rate": 3.976375436369001e-07, "loss": 0.2849, "step": 2400, "success_rate.epoch.env.abd": 0.9861751152073732, "success_rate.epoch.env.agentgym:alfworld": 0.8814814814814815, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9065040650406504, "success_rate.epoch.env.math": 0.9569093610698366, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.775377969762419, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8660559155881422, "success_rate.epoch.global": 0.8735656165240976, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992559523809523, "tokens_p.mean_in_band": 0.6734375, "tokens_rate.above_band": 0.9820143884892086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017985611510791366 }, { "epoch": 0.5123561994034939, "grad_norm": 66.35024469312371, "learning_rate": 3.9762596827260425e-07, "loss": 0.3837, "step": 2405, "success_rate.epoch.env.abd": 0.9861751152073732, "success_rate.epoch.env.agentgym:alfworld": 0.8814814814814815, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9066305818673883, "success_rate.epoch.env.math": 0.9570051890289103, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7759827679052235, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866152822625989, "success_rate.epoch.global": 0.8738288569643973, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979416167664671, "tokens_p.mean_in_band": 0.8177083333333334, "tokens_rate.above_band": 0.9653179190751445, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03468208092485549 }, { "epoch": 0.5134213890072433, "grad_norm": 47.59831070079333, "learning_rate": 3.976143654867731e-07, "loss": 0.3966, "step": 2410, "success_rate.epoch.env.abd": 0.9862385321100917, "success_rate.epoch.env.agentgym:alfworld": 0.8814814814814815, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9527896995708155, "success_rate.epoch.env.logic": 0.9067567567567567, "success_rate.epoch.env.math": 0.9571005917159763, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7759269210102095, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661921535585712, "success_rate.epoch.global": 0.8738832329108664, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959439528023599, "tokens_p.mean_in_band": 0.596875, "tokens_rate.above_band": 0.9854651162790697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014534883720930232 }, { "epoch": 0.5144865786109928, "grad_norm": 130.50296199173724, "learning_rate": 3.976027352878627e-07, "loss": 0.3629, "step": 2415, "success_rate.epoch.env.abd": 0.9862385321100917, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9529914529914529, "success_rate.epoch.env.logic": 0.9068825910931174, "success_rate.epoch.env.math": 0.9571639586410635, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7765273311897106, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8663615012455327, "success_rate.epoch.global": 0.874144723201327, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980936004784688, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9964243146603099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003575685339690107 }, { "epoch": 0.5155517682147422, "grad_norm": 96.31074249888358, "learning_rate": 3.97591077684349e-07, "loss": 0.5346, "step": 2420, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9068825910931174, "success_rate.epoch.env.math": 0.9571955719557196, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7771245323356494, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8660669256765762, "success_rate.epoch.global": 0.8741982205669356, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956168831168831, "tokens_p.mean_in_band": 0.6621621621621622, "tokens_rate.above_band": 0.9397884458909682, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.060211554109031735 }, { "epoch": 0.5166169578184917, "grad_norm": 72.3921305602384, "learning_rate": 3.9757939268472805e-07, "loss": 0.4327, "step": 2425, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9070080862533693, "success_rate.epoch.env.math": 0.9572901325478645, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7767714437932871, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8660548317865794, "success_rate.epoch.global": 0.874045013421433, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9925, "tokens_p.mean_in_band": 0.5870535714285714, "tokens_rate.above_band": 0.9146341463414634, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08536585365853659 }, { "epoch": 0.5176821474222412, "grad_norm": 72.95999639463913, "learning_rate": 3.9756768029751575e-07, "loss": 0.3234, "step": 2430, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9071332436069987, "success_rate.epoch.env.math": 0.9573842762674504, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7768331562167906, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661015529457955, "success_rate.epoch.global": 0.8740984957758088, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936079545454546, "tokens_p.mean_in_band": 0.6223958333333334, "tokens_rate.above_band": 0.9513513513513514, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04864864864864865 }, { "epoch": 0.5187473370259906, "grad_norm": 98.629512441129, "learning_rate": 3.97555940531248e-07, "loss": 0.5392, "step": 2435, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.907258064516129, "success_rate.epoch.env.math": 0.9567765567765568, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.776657824933687, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661273283813963, "success_rate.epoch.global": 0.873946123791898, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8833333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984567901234568, "tokens_p.mean_in_band": 0.3914930555555556, "tokens_rate.above_band": 0.9574468085106383, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0425531914893617 }, { "epoch": 0.5198125266297401, "grad_norm": 52.772482048741715, "learning_rate": 3.975441733944807e-07, "loss": 0.2619, "step": 2440, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.8840579710144928, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9075067024128687, "success_rate.epoch.env.math": 0.9567765567765568, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7772486772486773, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866305939331072, "success_rate.epoch.global": 0.874204801970039, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996675531914894, "tokens_p.mean_in_band": 0.869140625, "tokens_rate.above_band": 0.9983660130718954, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016339869281045752 }, { "epoch": 0.5208777162334896, "grad_norm": 241.05003876711066, "learning_rate": 3.9753237889578963e-07, "loss": 0.2334, "step": 2445, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.8848920863309353, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9075067024128687, "success_rate.epoch.env.math": 0.9569657184536834, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7776016904384575, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8664310566204674, "success_rate.epoch.global": 0.8744624206430474, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997890625, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.9950248756218906, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004975124378109453 }, { "epoch": 0.521942905837239, "grad_norm": 97.15092585275974, "learning_rate": 3.9752055704377057e-07, "loss": 0.3471, "step": 2450, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9078771695594126, "success_rate.epoch.env.math": 0.9570284049526584, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7775434897206115, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665398886487423, "success_rate.epoch.global": 0.8745146127120376, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988957597173145, "tokens_p.mean_in_band": 0.5485026041666666, "tokens_rate.above_band": 0.9792387543252595, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020761245674740483 }, { "epoch": 0.5230080954409885, "grad_norm": 53.052851937983725, "learning_rate": 3.9750870784703913e-07, "loss": 0.4486, "step": 2455, "success_rate.epoch.env.abd": 0.9819819819819819, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9537815126050421, "success_rate.epoch.env.logic": 0.9081225033288948, "success_rate.epoch.env.math": 0.9571220930232558, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7778947368421053, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666277807550707, "success_rate.epoch.global": 0.8747705486436875, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998481308411215, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9962756052141527, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0037243947858473 }, { "epoch": 0.5240732850447379, "grad_norm": 87.1427053362088, "learning_rate": 3.9749683131423096e-07, "loss": 0.2645, "step": 2460, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9537815126050421, "success_rate.epoch.env.logic": 0.9081225033288948, "success_rate.epoch.env.math": 0.9572463768115942, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7783613445378151, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667625286416047, "success_rate.epoch.global": 0.8750254427030327, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960462287104623, "tokens_p.mean_in_band": 0.7534722222222222, "tokens_rate.above_band": 0.9785714285714285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02142857142857143 }, { "epoch": 0.5251384746484874, "grad_norm": 192.9471286753687, "learning_rate": 3.974849274540016e-07, "loss": 0.3167, "step": 2465, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9082446808510638, "success_rate.epoch.env.math": 0.9566787003610109, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7785939139559287, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667816647316458, "success_rate.epoch.global": 0.8750761730652041, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977370689655173, "tokens_p.mean_in_band": 0.5121527777777778, "tokens_rate.above_band": 0.9698996655518395, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030100334448160536 }, { "epoch": 0.5262036642522369, "grad_norm": 137.28817544581148, "learning_rate": 3.974729962750264e-07, "loss": 0.3777, "step": 2470, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9082446808510638, "success_rate.epoch.env.math": 0.9567723342939481, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7782426778242678, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667789028123181, "success_rate.epoch.global": 0.8749239813500912, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939144736842105, "tokens_p.mean_in_band": 0.6219308035714286, "tokens_rate.above_band": 0.9313725490196079, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06862745098039216 }, { "epoch": 0.5272688538559863, "grad_norm": 72.98504092505685, "learning_rate": 3.974610377860009e-07, "loss": 0.3346, "step": 2475, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9084880636604774, "success_rate.epoch.env.math": 0.9568965517241379, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7785900783289817, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866851182595783, "success_rate.epoch.global": 0.8751770180052599, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980053191489362, "tokens_p.mean_in_band": 0.857421875, "tokens_rate.above_band": 0.9791666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020833333333333332 }, { "epoch": 0.5283340434597358, "grad_norm": 87.94830620386523, "learning_rate": 3.9744905199564027e-07, "loss": 0.501, "step": 2480, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9088507265521797, "success_rate.epoch.env.math": 0.956989247311828, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7790515893694633, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669345343703171, "success_rate.epoch.global": 0.8754290329093478, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9932553956834532, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9586206896551724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041379310344827586 }, { "epoch": 0.5293992330634854, "grad_norm": 26.892422576680357, "learning_rate": 3.974370389126796e-07, "loss": 0.2039, "step": 2485, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9570815450643777, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7796257796257796, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867016958965553, "success_rate.epoch.global": 0.8756800322385654, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965277777777778, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.984375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015625 }, { "epoch": 0.5304644226672348, "grad_norm": 226.98644075811197, "learning_rate": 3.974249985458741e-07, "loss": 0.3152, "step": 2490, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9092105263157895, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7793354101765316, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867043596502452, "success_rate.epoch.global": 0.8755278503921174, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.86, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992236024844721, "tokens_p.mean_in_band": 0.5262784090909091, "tokens_rate.above_band": 0.9777327935222672, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022267206477732792 }, { "epoch": 0.5315296122709843, "grad_norm": 99.26347115567036, "learning_rate": 3.974129309039985e-07, "loss": 0.3905, "step": 2495, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9541666666666667, "success_rate.epoch.env.logic": 0.9093298291721419, "success_rate.epoch.env.math": 0.9566453447050463, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7795643153526971, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671755537139266, "success_rate.epoch.global": 0.8757776439895645, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997981266149871, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9987096774193548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012903225806451613 }, { "epoch": 0.5325948018747337, "grad_norm": 146.99056460450822, "learning_rate": 3.974008359958477e-07, "loss": 0.4452, "step": 2500, "success_rate.epoch.env.abd": 0.9823008849557522, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9081364829396326, "success_rate.epoch.env.math": 0.9566761363636364, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7793889176592439, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667147273114804, "success_rate.epoch.global": 0.8754255958341678, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984142945544554, "tokens_p.mean_in_band": 0.5317042151162791, "tokens_rate.above_band": 0.9494712103407755, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05052878965922444 }, { "epoch": 0.5336599914784832, "grad_norm": 324.8467036600824, "learning_rate": 3.973887138302364e-07, "loss": 0.446, "step": 2505, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9081364829396326, "success_rate.epoch.env.math": 0.9567375886524823, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7801857585139319, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667998421126925, "success_rate.epoch.global": 0.8756745952428543, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994400289017341, "tokens_p.mean_in_band": 0.68310546875, "tokens_rate.above_band": 0.9774011299435028, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022598870056497175 }, { "epoch": 0.5347251810822327, "grad_norm": 140.48786848758652, "learning_rate": 3.9737656441599927e-07, "loss": 0.3785, "step": 2510, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.908256880733945, "success_rate.epoch.env.math": 0.9568904593639576, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7805255023183926, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8668754848727499, "success_rate.epoch.global": 0.8759226012367843, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970034246575342, "tokens_p.mean_in_band": 0.802734375, "tokens_rate.above_band": 0.9864864864864865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013513513513513514 }, { "epoch": 0.5357903706859821, "grad_norm": 288.53355924210047, "learning_rate": 3.9736438776199045e-07, "loss": 0.3281, "step": 2515, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9070680628272252, "success_rate.epoch.env.math": 0.9569209039548022, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7810894141829393, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669117552296908, "success_rate.epoch.global": 0.8759705355365319, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972546095444685, "tokens_p.mean_in_band": 0.5251116071428571, "tokens_rate.above_band": 0.9634273772204807, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03657262277951933 }, { "epoch": 0.5368555602897316, "grad_norm": 37.51261923523871, "learning_rate": 3.9735218387708443e-07, "loss": 0.1555, "step": 2520, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9071895424836601, "success_rate.epoch.env.math": 0.9569513055751588, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7817622950819673, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669867336093108, "success_rate.epoch.global": 0.8762169680111266, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973695286195287, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9983193277310924, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016806722689075631 }, { "epoch": 0.537920749893481, "grad_norm": 94.49331500117329, "learning_rate": 3.9733995277017516e-07, "loss": 0.2636, "step": 2525, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9075520833333334, "success_rate.epoch.env.math": 0.9570119802677942, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7816973415132924, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670364490680434, "success_rate.epoch.global": 0.8762641284949435, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0005857544517338, "tokens_p.mean_in_band": 0.71630859375, "tokens_rate.above_band": 0.9925581395348837, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0074418604651162795 }, { "epoch": 0.5389859394972305, "grad_norm": 731.3612810956826, "learning_rate": 3.9732769445017665e-07, "loss": 0.4249, "step": 2530, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9076723016905072, "success_rate.epoch.env.math": 0.9571026722925458, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7817440081591025, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670598651614739, "success_rate.epoch.global": 0.8763111023154562, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957264957264957, "tokens_p.mean_in_band": 0.40234375, "tokens_rate.above_band": 0.9915254237288136, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00847457627118644 }, { "epoch": 0.54005112910098, "grad_norm": 64.85605221932524, "learning_rate": 3.973154089260227e-07, "loss": 0.3177, "step": 2535, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8896551724137931, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9547325102880658, "success_rate.epoch.env.logic": 0.9064935064935065, "success_rate.epoch.env.math": 0.9571629213483146, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7817904374364191, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670490671981649, "success_rate.epoch.global": 0.8761603792218052, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978742732558139, "tokens_p.mean_in_band": 0.4457236842105263, "tokens_rate.above_band": 0.9783845278725825, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02161547212741752 }, { "epoch": 0.5411163187047294, "grad_norm": 337.10997695762836, "learning_rate": 3.973030962066668e-07, "loss": 0.4547, "step": 2540, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9549180327868853, "success_rate.epoch.env.logic": 0.9066147859922179, "success_rate.epoch.env.math": 0.9572230014025246, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7818366311516997, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671553274033773, "success_rate.epoch.global": 0.876207372363493, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983595800524935, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9870466321243523, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012953367875647668 }, { "epoch": 0.5421815083084789, "grad_norm": 126.4327754675605, "learning_rate": 3.972907563010826e-07, "loss": 0.3846, "step": 2545, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9549180327868853, "success_rate.epoch.env.logic": 0.906856403622251, "success_rate.epoch.env.math": 0.9573128061581525, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7817721518987342, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671990365971033, "success_rate.epoch.global": 0.8762541806020067, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959084440227703, "tokens_p.mean_in_band": 0.7135416666666666, "tokens_rate.above_band": 0.9777365491651205, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022263450834879406 }, { "epoch": 0.5432466979122283, "grad_norm": 135.44532687206348, "learning_rate": 3.972783892182631e-07, "loss": 0.2862, "step": 2550, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.9070967741935484, "success_rate.epoch.env.math": 0.9574022346368715, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7822132390096008, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86728584515986, "success_rate.epoch.global": 0.8764971529550363, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984657622739018, "tokens_p.mean_in_band": 0.6146918402777778, "tokens_rate.above_band": 0.9772727272727273, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022727272727272728 }, { "epoch": 0.5443118875159778, "grad_norm": 159.68119557055104, "learning_rate": 3.972659949672214e-07, "loss": 0.4017, "step": 2555, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.9073359073359073, "success_rate.epoch.env.math": 0.9575208913649025, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7821482602118003, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673124643482772, "success_rate.epoch.global": 0.8765432098765432, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945652173913043, "tokens_p.mean_in_band": 0.50703125, "tokens_rate.above_band": 0.9019607843137255, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09803921568627451 }, { "epoch": 0.5453770771197273, "grad_norm": 92.1967519557992, "learning_rate": 3.972535735569904e-07, "loss": 0.3303, "step": 2560, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.891156462585034, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.9073359073359073, "success_rate.epoch.env.math": 0.9576682859125607, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.782083543029693, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673877535343205, "success_rate.epoch.global": 0.8765890866418932, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979281767955801, "tokens_p.mean_in_band": 0.7309027777777778, "tokens_rate.above_band": 0.9757412398921833, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02425876010781671 }, { "epoch": 0.5464422667234767, "grad_norm": 222.11892995636822, "learning_rate": 3.972411249966227e-07, "loss": 0.4238, "step": 2565, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.891156462585034, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.951417004048583, "success_rate.epoch.env.logic": 0.9062901155327343, "success_rate.epoch.env.math": 0.9570637119113573, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7821931589537223, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669266726086896, "success_rate.epoch.global": 0.8762443880538747, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9857220767888307, "tokens_p.mean_in_band": 0.4992922957371226, "tokens_rate.above_band": 0.7178651966925582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.28213480330744173 }, { "epoch": 0.5475074563272262, "grad_norm": 91.53012504637596, "learning_rate": 3.9722864929519076e-07, "loss": 0.4132, "step": 2570, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.891156462585034, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9062901155327343, "success_rate.epoch.env.math": 0.9571230982019364, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.782237832413447, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669908191315795, "success_rate.epoch.global": 0.8762906682252094, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.990175585284281, "tokens_p.mean_in_band": 0.8039434523809523, "tokens_rate.above_band": 0.934375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.065625 }, { "epoch": 0.5485726459309758, "grad_norm": 201.6586376209722, "learning_rate": 3.972161464617867e-07, "loss": 0.2345, "step": 2575, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.8940397350993378, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9064102564102564, "success_rate.epoch.env.math": 0.9571823204419889, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.782565130260521, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8672989949023934, "success_rate.epoch.global": 0.8765312074664593, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976480836236934, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.997913769123783, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002086230876216968 }, { "epoch": 0.5496378355347252, "grad_norm": 71.10610993549085, "learning_rate": 3.972036165055225e-07, "loss": 0.5212, "step": 2580, "success_rate.epoch.env.abd": 0.9826086956521739, "success_rate.epoch.env.agentgym:alfworld": 0.8940397350993378, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9053708439897699, "success_rate.epoch.env.math": 0.9572118702553485, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7827172827172827, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867227925303312, "success_rate.epoch.global": 0.8763826896953231, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978725416036308, "tokens_p.mean_in_band": 0.44344429347826086, "tokens_rate.above_band": 0.966374269005848, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033625730994152045 }, { "epoch": 0.5507030251384747, "grad_norm": 152.31945652356728, "learning_rate": 3.9719105943553e-07, "loss": 0.3216, "step": 2585, "success_rate.epoch.env.abd": 0.9826086956521739, "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9054916985951469, "success_rate.epoch.env.math": 0.9572708476912474, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7831505483549352, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674285653136503, "success_rate.epoch.global": 0.8766221189231067, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991203703703704, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.995575221238938, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004424778761061947 }, { "epoch": 0.5517682147422242, "grad_norm": 51.022859740643355, "learning_rate": 3.971784752609607e-07, "loss": 0.2812, "step": 2590, "success_rate.epoch.env.abd": 0.9826086956521739, "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9057324840764331, "success_rate.epoch.env.math": 0.9573883161512028, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7835820895522388, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675003648716999, "success_rate.epoch.global": 0.8768606224627875, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9913366336633663, "tokens_p.mean_in_band": 0.826171875, "tokens_rate.above_band": 0.9805825242718447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019417475728155338 }, { "epoch": 0.5528334043459736, "grad_norm": 293.4973952286877, "learning_rate": 3.971658639909857e-07, "loss": 0.481, "step": 2595, "success_rate.epoch.env.abd": 0.9826839826839827, "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.905852417302799, "success_rate.epoch.env.math": 0.9574759945130316, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7835153922542205, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675387830235906, "success_rate.epoch.global": 0.8769052672197569, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978849407783418, "tokens_p.mean_in_band": 0.5120738636363636, "tokens_rate.above_band": 0.9817275747508306, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018272425249169437 }, { "epoch": 0.5538985939497231, "grad_norm": 71.21974049576679, "learning_rate": 3.9715322563479617e-07, "loss": 0.383, "step": 2600, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8954248366013072, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9059720457433291, "success_rate.epoch.env.math": 0.9576213260423787, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7833415964303421, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675538559464697, "success_rate.epoch.global": 0.8769497400346621, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9927884615384616, "tokens_p.mean_in_band": 0.5384114583333334, "tokens_rate.above_band": 0.9381443298969072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061855670103092786 }, { "epoch": 0.5549637835534725, "grad_norm": 81.04741703187635, "learning_rate": 3.971405602016028e-07, "loss": 0.3355, "step": 2605, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9064475347661188, "success_rate.epoch.env.math": 0.9577368779822768, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7834489593657086, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676790798919828, "success_rate.epoch.global": 0.8771862387084375, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992038216560509, "tokens_p.mean_in_band": 0.755859375, "tokens_rate.above_band": 0.9936708860759493, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006329113924050633 }, { "epoch": 0.556028973157222, "grad_norm": 100.81102008230842, "learning_rate": 3.97127867700636e-07, "loss": 0.3533, "step": 2610, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.906801007556675, "success_rate.epoch.env.math": 0.9577656675749319, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7821146245059288, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675925278486584, "success_rate.epoch.global": 0.8766545175522732, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9960730088495575, "tokens_p.mean_below_band": 1.6079866327345371e-09, "tokens_p.mean_in_band": 0.4947350543478261, "tokens_rate.above_band": 0.9592529711375212, "tokens_rate.below_band": 0.001697792869269949, "tokens_rate.in_band": 0.03904923599320883 }, { "epoch": 0.5570941627609715, "grad_norm": 191.96566746958672, "learning_rate": 3.97115148141146e-07, "loss": 0.3816, "step": 2615, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.952191235059761, "success_rate.epoch.env.logic": 0.9069182389937107, "success_rate.epoch.env.math": 0.9571719918422842, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.782329713721619, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676786550442059, "success_rate.epoch.global": 0.8766992150105304, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988127074880118, "tokens_p.mean_in_band": 0.7046875, "tokens_rate.above_band": 0.998159057437408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001840942562592047 }, { "epoch": 0.5581593523647209, "grad_norm": 803.1966455888191, "learning_rate": 3.9710240153240283e-07, "loss": 0.2919, "step": 2620, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9642857142857143, "success_rate.epoch.env.ded": 0.952191235059761, "success_rate.epoch.env.logic": 0.9071518193224593, "success_rate.epoch.env.math": 0.9572591587516961, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7827586206896552, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.864500052179834, "success_rate.epoch.global": 0.8767437416395949, "success_rate.window.env.babyai": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995702005730659, "tokens_p.mean_in_band": 0.5260416666666666, "tokens_rate.above_band": 0.9914772727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008522727272727272 }, { "epoch": 0.5592245419684704, "grad_norm": 277.50867450949494, "learning_rate": 3.97089627883696e-07, "loss": 0.6429, "step": 2625, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8961038961038961, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9525691699604744, "success_rate.epoch.env.logic": 0.9072681704260651, "success_rate.epoch.env.math": 0.9573170731707317, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7831858407079646, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8647010474103123, "success_rate.epoch.global": 0.8769788289147434, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998143115942029, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.999275887038378, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.000724112961622013 }, { "epoch": 0.5602897315722198, "grad_norm": 126.7794952636177, "learning_rate": 3.9707682720433493e-07, "loss": 0.2049, "step": 2630, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.896774193548387, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9525691699604744, "success_rate.epoch.env.logic": 0.9075, "success_rate.epoch.env.math": 0.9574324324324325, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.783014236622484, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8647779457916438, "success_rate.epoch.global": 0.8770226537216829, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976293103448276, "tokens_p.mean_in_band": 0.7550223214285714, "tokens_rate.above_band": 0.9764309764309764, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02356902356902357 }, { "epoch": 0.5613549211759693, "grad_norm": 103.30703815180942, "learning_rate": 3.9706399950364863e-07, "loss": 0.2585, "step": 2635, "success_rate.epoch.env.abd": 0.9829787234042553, "success_rate.epoch.env.agentgym:alfworld": 0.896774193548387, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.9075, "success_rate.epoch.env.math": 0.9575471698113207, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7833333333333333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8648476435206963, "success_rate.epoch.global": 0.8772563176895307, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997299789251844, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9989473684210526, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0010526315789473684 }, { "epoch": 0.5624201107797188, "grad_norm": 154.9652718105165, "learning_rate": 3.9705114479098583e-07, "loss": 0.2586, "step": 2640, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.896774193548387, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9076154806491885, "success_rate.epoch.env.math": 0.9576043068640646, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7833740831295843, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8648904401125447, "success_rate.epoch.global": 0.8772994500284468, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975765306122449, "tokens_p.mean_in_band": 0.435546875, "tokens_rate.above_band": 0.9919028340080972, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008097165991902834 }, { "epoch": 0.5634853003834682, "grad_norm": 126.45246194760615, "learning_rate": 3.9703826307571496e-07, "loss": 0.4028, "step": 2645, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9077306733167082, "success_rate.epoch.env.math": 0.9576612903225806, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7834146341463415, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649866450725189, "success_rate.epoch.global": 0.8773424190800682, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999349925705795, "tokens_p.mean_in_band": 0.466796875, "tokens_rate.above_band": 0.9911634756995582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008836524300441826 }, { "epoch": 0.5645504899872177, "grad_norm": 95.04606237284413, "learning_rate": 3.9702535436722413e-07, "loss": 0.3611, "step": 2650, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.953307392996109, "success_rate.epoch.env.logic": 0.9069478908188585, "success_rate.epoch.env.math": 0.9577464788732394, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7831384015594542, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649146965690672, "success_rate.epoch.global": 0.8771962969960325, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976554470709147, "tokens_p.mean_in_band": 0.6715198863636364, "tokens_rate.above_band": 0.9778894472361809, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022110552763819097 }, { "epoch": 0.5656156795909671, "grad_norm": 201.03551712049062, "learning_rate": 3.970124186749211e-07, "loss": 0.4737, "step": 2655, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9070631970260223, "success_rate.epoch.env.math": 0.9578595317725752, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7824817518248175, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649107590087755, "success_rate.epoch.global": 0.8770507260041486, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666668, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981218030690537, "tokens_p.mean_in_band": 0.4289772727272727, "tokens_rate.above_band": 0.9861286254728878, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013871374527112233 }, { "epoch": 0.5666808691947166, "grad_norm": 159.40912316367812, "learning_rate": 3.969994560082333e-07, "loss": 0.3871, "step": 2660, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8974358974358975, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9071782178217822, "success_rate.epoch.env.math": 0.9579719813208806, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7825242718446602, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649353035873125, "success_rate.epoch.global": 0.8770939205721815, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956140350877193, "tokens_p.mean_in_band": 0.599609375, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 0.5677460587984662, "grad_norm": 173.34160478951966, "learning_rate": 3.969864663766079e-07, "loss": 0.3907, "step": 2665, "success_rate.epoch.env.abd": 0.9831932773109243, "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9074074074074074, "success_rate.epoch.env.math": 0.958, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7829457364341085, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650693378824137, "success_rate.epoch.global": 0.8773248168326132, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99658203125, "tokens_p.mean_in_band": 0.64306640625, "tokens_rate.above_band": 0.975609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024390243902439025 }, { "epoch": 0.5688112484022156, "grad_norm": 67.47328691524926, "learning_rate": 3.969734497895116e-07, "loss": 0.4688, "step": 2670, "success_rate.epoch.env.abd": 0.9831932773109243, "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9075215782983971, "success_rate.epoch.env.math": 0.9574468085106383, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7833655705996132, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.865083919258965, "success_rate.epoch.global": 0.8773673354584661, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986795774647887, "tokens_p.mean_in_band": 0.6822916666666666, "tokens_rate.above_band": 0.9895470383275261, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010452961672473868 }, { "epoch": 0.5698764380059651, "grad_norm": 129.90907328395582, "learning_rate": 3.969604062564308e-07, "loss": 0.4853, "step": 2675, "success_rate.epoch.env.abd": 0.9832635983263598, "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9078624078624079, "success_rate.epoch.env.math": 0.9574750830564784, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.783405692233478, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651275144188879, "success_rate.epoch.global": 0.8774096949279431, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968619246861925, "tokens_p.mean_in_band": 0.445703125, "tokens_rate.above_band": 0.9598393574297188, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040160642570281124 }, { "epoch": 0.5709416276097146, "grad_norm": 78.91704076627231, "learning_rate": 3.9694733578687146e-07, "loss": 0.2861, "step": 2680, "success_rate.epoch.env.abd": 0.9832635983263598, "success_rate.epoch.env.agentgym:alfworld": 0.8980891719745223, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9079754601226994, "success_rate.epoch.env.math": 0.9575596816976127, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7834456207892204, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651674434609375, "success_rate.epoch.global": 0.8774518961330096, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979383680555556, "tokens_p.mean_in_band": 0.6484375, "tokens_rate.above_band": 0.9829351535836177, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017064846416382253 }, { "epoch": 0.572006817213464, "grad_norm": 146.30382255916345, "learning_rate": 3.9693423839035933e-07, "loss": 0.2785, "step": 2685, "success_rate.epoch.env.abd": 0.983402489626556, "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9080882352941176, "success_rate.epoch.env.math": 0.9576158940397351, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.7833813640730067, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652482277644094, "success_rate.epoch.global": 0.8774939399589782, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962298927613941, "tokens_p.mean_in_band": 0.6106770833333334, "tokens_rate.above_band": 0.9920212765957447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007978723404255319 }, { "epoch": 0.5730720068172135, "grad_norm": 110.9350808935789, "learning_rate": 3.969211140764397e-07, "loss": 0.293, "step": 2690, "success_rate.epoch.env.abd": 0.9834710743801653, "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9083129584352079, "success_rate.epoch.env.math": 0.9576158940397351, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7835249042145593, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649213723072946, "success_rate.epoch.global": 0.8773497115205658, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945833333333334, "tokens_p.mean_in_band": 0.651110197368421, "tokens_rate.above_band": 0.8875739644970414, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11242603550295859 }, { "epoch": 0.574137196420963, "grad_norm": 144.55120302286807, "learning_rate": 3.969079628546774e-07, "loss": 0.1735, "step": 2695, "success_rate.epoch.env.abd": 0.9835390946502057, "success_rate.epoch.env.agentgym:alfworld": 0.89937106918239, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9083129584352079, "success_rate.epoch.env.math": 0.9576719576719577, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7840420449116101, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650556849319014, "success_rate.epoch.global": 0.8775775589819803, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985219594594594, "tokens_p.mean_in_band": 0.806640625, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02631578947368421 }, { "epoch": 0.5752023860247124, "grad_norm": 96.04012700928776, "learning_rate": 3.96894784734657e-07, "loss": 0.291, "step": 2700, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.89937106918239, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9538461538461539, "success_rate.epoch.env.logic": 0.9083129584352079, "success_rate.epoch.env.math": 0.9577557755775578, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7840800762631077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650890951284599, "success_rate.epoch.global": 0.8776191359169293, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997751798561151, "tokens_p.mean_in_band": 0.4580078125, "tokens_rate.above_band": 0.9788732394366197, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02112676056338028 }, { "epoch": 0.5762675756284619, "grad_norm": 87.59971385038605, "learning_rate": 3.9688157972598273e-07, "loss": 0.3949, "step": 2705, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.89375, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9073170731707317, "success_rate.epoch.env.math": 0.957811470006592, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7842857142857143, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8645395035544319, "success_rate.epoch.global": 0.8774754765870812, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004807692307693, "tokens_p.mean_in_band": 0.515625, "tokens_rate.above_band": 0.9903769045709703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009623095429029671 }, { "epoch": 0.5773327652322113, "grad_norm": 72.34888465643535, "learning_rate": 3.9686834783827814e-07, "loss": 0.3128, "step": 2710, "success_rate.epoch.env.abd": 0.9838056680161943, "success_rate.epoch.env.agentgym:alfworld": 0.89375, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9074299634591961, "success_rate.epoch.env.math": 0.957922419460881, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7842205323193916, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.864559911590273, "success_rate.epoch.global": 0.8775170884906706, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9907407407407407, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 0.5783979548359608, "grad_norm": 270.9499998128655, "learning_rate": 3.9685508908118657e-07, "loss": 0.359, "step": 2715, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9075425790754258, "success_rate.epoch.env.math": 0.9579500657030223, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7843601895734598, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8646512894744892, "success_rate.epoch.global": 0.8775585469297437, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981094306049823, "tokens_p.mean_in_band": 0.6025390625, "tokens_rate.above_band": 0.9859649122807017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014035087719298246 }, { "epoch": 0.5794631444397103, "grad_norm": 27.085048256464663, "learning_rate": 3.9684180346437086e-07, "loss": 0.1702, "step": 2720, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9076549210206561, "success_rate.epoch.env.math": 0.9580877537655533, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7846663511594889, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8648063455545937, "success_rate.epoch.global": 0.8777839131235045, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_in_band": 0.72578125, "tokens_rate.above_band": 0.9760765550239234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023923444976076555 }, { "epoch": 0.5805283340434597, "grad_norm": 94.42672213569979, "learning_rate": 3.9682849099751366e-07, "loss": 0.3339, "step": 2725, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9659090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9543726235741445, "success_rate.epoch.env.logic": 0.9078787878787878, "success_rate.epoch.env.math": 0.9581425768476128, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7848699763593381, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649176000762498, "success_rate.epoch.global": 0.8780084512217528, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998881153654898, "tokens_p.mean_in_band": 0.6455078125, "tokens_rate.above_band": 0.9990064580228515, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0009935419771485345 }, { "epoch": 0.5815935236472092, "grad_norm": 101.82936492693237, "learning_rate": 3.968151516903168e-07, "loss": 0.5311, "step": 2730, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9543726235741445, "success_rate.epoch.env.logic": 0.9078787878787878, "success_rate.epoch.env.math": 0.95822454308094, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.785007072135785, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649550242595169, "success_rate.epoch.global": 0.8780487804878049, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994926948051948, "tokens_p.mean_in_band": 0.38046875, "tokens_rate.above_band": 0.9390243902439024, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06097560975609756 }, { "epoch": 0.5826587132509586, "grad_norm": 196.2003889816082, "learning_rate": 3.96801785552502e-07, "loss": 0.2636, "step": 2735, "success_rate.epoch.env.abd": 0.9839357429718876, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9078787878787878, "success_rate.epoch.env.math": 0.9582517938682322, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7845719661335842, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650215177555082, "success_rate.epoch.global": 0.8779059125022881, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996353598691385, "tokens_p.mean_in_band": 0.4625651041666667, "tokens_rate.above_band": 0.9870828848223897, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012917115177610334 }, { "epoch": 0.5837239028547081, "grad_norm": 136.88939730849296, "learning_rate": 3.9678839259381026e-07, "loss": 0.2627, "step": 2740, "success_rate.epoch.env.abd": 0.9839357429718876, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9081015719467956, "success_rate.epoch.env.math": 0.9583061889250815, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7847091932457786, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650591910497856, "success_rate.epoch.global": 0.8779462817467568, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959558823529412, "tokens_p.mean_in_band": 0.6921875, "tokens_rate.above_band": 0.8947368421052632, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10526315789473684 }, { "epoch": 0.5847890924584576, "grad_norm": 165.80382489585057, "learning_rate": 3.9677497282400245e-07, "loss": 0.268, "step": 2745, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9083232810615199, "success_rate.epoch.env.math": 0.9583875162548765, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7845433255269321, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650930958080042, "success_rate.epoch.global": 0.8779865037388291, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996546961325967, "tokens_p.mean_in_band": 0.5651041666666666, "tokens_rate.above_band": 0.9679144385026738, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03208556149732621 }, { "epoch": 0.585854282062207, "grad_norm": 141.3101173271051, "learning_rate": 3.967615262528587e-07, "loss": 0.4855, "step": 2750, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.908433734939759, "success_rate.epoch.env.math": 0.958414554905783, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7842129845866418, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651096124829188, "success_rate.epoch.global": 0.8778445294010558, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9997391001855288, "tokens_p.mean_in_band": 0.3701171875, "tokens_rate.above_band": 0.9853747714808044, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014625228519195612 }, { "epoch": 0.5869194716659566, "grad_norm": 141.42994281570122, "learning_rate": 3.967480528901788e-07, "loss": 0.4361, "step": 2755, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9073405535499398, "success_rate.epoch.env.math": 0.9585223590408296, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7841491841491841, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.865014232692716, "success_rate.epoch.global": 0.8777030710521534, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979378172588832, "tokens_p.mean_in_band": 0.5640243902439024, "tokens_rate.above_band": 0.9505428226779252, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04945717732207479 }, { "epoch": 0.587984661269706, "grad_norm": 48.812305725481735, "learning_rate": 3.9673455274578204e-07, "loss": 0.3194, "step": 2760, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9074519230769231, "success_rate.epoch.env.math": 0.9586028460543338, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7846511627906977, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650940506240447, "success_rate.epoch.global": 0.8779249047705423, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969135802469136, "tokens_p.mean_in_band": 0.611328125, "tokens_rate.above_band": 0.9759036144578314, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024096385542168676 }, { "epoch": 0.5890498508734555, "grad_norm": 62.55218594436803, "learning_rate": 3.967210258295072e-07, "loss": 0.2571, "step": 2765, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.907673860911271, "success_rate.epoch.env.math": 0.9587362991618311, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7849512308406874, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651536378051208, "success_rate.epoch.global": 0.8781459351801557, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956095041322314, "tokens_p.mean_in_band": 0.81201171875, "tokens_rate.above_band": 0.937984496124031, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06201550387596899 }, { "epoch": 0.590115040477205, "grad_norm": 212.9899388467896, "learning_rate": 3.967074721512126e-07, "loss": 0.3378, "step": 2770, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9078947368421053, "success_rate.epoch.env.math": 0.9588424437299036, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7852504638218923, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652260460226798, "success_rate.epoch.global": 0.8783661666365443, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945977393617021, "tokens_p.mean_in_band": 0.755859375, "tokens_rate.above_band": 0.9947089947089947, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005291005291005291 }, { "epoch": 0.5911802300809544, "grad_norm": 145.69168191162092, "learning_rate": 3.966938917207761e-07, "loss": 0.2639, "step": 2775, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8944099378881988, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9080047789725209, "success_rate.epoch.env.math": 0.9589216944801027, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7849213691026827, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652133367646256, "success_rate.epoch.global": 0.8782247880209273, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9922680412371134, "tokens_p.mean_in_band": 0.34521484375, "tokens_rate.above_band": 0.9603960396039604, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039603960396039604 }, { "epoch": 0.5922454196847039, "grad_norm": 52.29601460066344, "learning_rate": 3.96680284548095e-07, "loss": 0.2678, "step": 2780, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9081145584725537, "success_rate.epoch.env.math": 0.9590268886043534, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7851201478743068, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653325573306598, "success_rate.epoch.global": 0.8784440842787682, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972165991902834, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9959677419354839, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004032258064516129 }, { "epoch": 0.5933106092884534, "grad_norm": 121.97529309328436, "learning_rate": 3.966666506430861e-07, "loss": 0.3134, "step": 2785, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9082240762812872, "success_rate.epoch.env.math": 0.9591576260370134, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7849561605906784, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8654372424920208, "success_rate.epoch.global": 0.8784828330037749, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982443820224719, "tokens_p.mean_in_band": 0.58544921875, "tokens_rate.above_band": 0.978021978021978, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02197802197802198 }, { "epoch": 0.5943757988922028, "grad_norm": 30.028166788365052, "learning_rate": 3.9665299001568577e-07, "loss": 0.4069, "step": 2790, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9085510688836105, "success_rate.epoch.env.math": 0.9591576260370134, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7855499309710078, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8655373252832739, "success_rate.epoch.global": 0.8787008792391889, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976136363636363, "tokens_p.mean_in_band": 0.7903645833333334, "tokens_rate.above_band": 0.9892086330935251, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01079136690647482 }, { "epoch": 0.5954409884959523, "grad_norm": 126.42427937631184, "learning_rate": 3.9663930267584965e-07, "loss": 0.2259, "step": 2795, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9085510688836105, "success_rate.epoch.env.math": 0.9592356687898089, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7845659163987139, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8654922724297704, "success_rate.epoch.global": 0.8783807988536629, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949433656957929, "tokens_p.mean_in_band": 0.5829503676470589, "tokens_rate.above_band": 0.9478527607361963, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05214723926380368 }, { "epoch": 0.5965061780997017, "grad_norm": 111.955382617761, "learning_rate": 3.96625588633553e-07, "loss": 0.4845, "step": 2800, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9074733096085409, "success_rate.epoch.env.math": 0.9587301587301588, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7845025217790005, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653425757066399, "success_rate.epoch.global": 0.8780618630430895, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9966722129783694, "tokens_p.mean_in_band": 0.490625, "tokens_rate.above_band": 0.9161585365853658, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08384146341463415 }, { "epoch": 0.5975713677034512, "grad_norm": 28.612260925514917, "learning_rate": 3.9661184789879066e-07, "loss": 0.2549, "step": 2805, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9075829383886256, "success_rate.epoch.env.math": 0.958808618504436, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7849954254345837, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8654658262559678, "success_rate.epoch.global": 0.878279493128681, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975961538461539, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9970501474926253, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0029498525073746312 }, { "epoch": 0.5986365573072007, "grad_norm": 417.3717178730771, "learning_rate": 3.965980804815766e-07, "loss": 0.4059, "step": 2810, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9078014184397163, "success_rate.epoch.env.math": 0.958886780518659, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7853881278538812, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8655445178654543, "success_rate.epoch.global": 0.878496347764119, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947519083969466, "tokens_p.mean_in_band": 0.783203125, "tokens_rate.above_band": 0.9424460431654677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05755395683453238 }, { "epoch": 0.5997017469109501, "grad_norm": 120.7657315705206, "learning_rate": 3.9658428639194454e-07, "loss": 0.2757, "step": 2815, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9079102715466352, "success_rate.epoch.env.math": 0.9589646464646465, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7854214123006834, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8655702217466253, "success_rate.epoch.global": 0.8785345900764716, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99425, "tokens_p.mean_in_band": 0.71875, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 0.6007669365146996, "grad_norm": 68.14852446729229, "learning_rate": 3.9657046563994737e-07, "loss": 0.4115, "step": 2820, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9516728624535316, "success_rate.epoch.env.logic": 0.9080188679245284, "success_rate.epoch.env.math": 0.9590680100755667, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7851615839781521, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652741474067014, "success_rate.epoch.global": 0.8783951713119119, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9922632629777525, "tokens_p.mean_below_band": 3.0174851417541504e-07, "tokens_p.mean_in_band": 0.4756988463488844, "tokens_rate.above_band": 0.7798042704626335, "tokens_rate.below_band": 0.0008896797153024911, "tokens_rate.in_band": 0.21930604982206406 }, { "epoch": 0.601832126118449, "grad_norm": 77.47932183983724, "learning_rate": 3.9655661823565776e-07, "loss": 0.3536, "step": 2825, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9516728624535316, "success_rate.epoch.env.logic": 0.9084507042253521, "success_rate.epoch.env.math": 0.9591451917033312, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7853569804456572, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653381850790735, "success_rate.epoch.global": 0.8786106680843523, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988425925925926, "tokens_p.mean_in_band": 0.7981770833333334, "tokens_rate.above_band": 0.989010989010989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01098901098901099 }, { "epoch": 0.6028973157221985, "grad_norm": 78.7612652563382, "learning_rate": 3.965427441891674e-07, "loss": 0.4226, "step": 2830, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9086651053864169, "success_rate.epoch.env.math": 0.9591964846202135, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7853901996370236, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653816307761311, "success_rate.epoch.global": 0.878648505218468, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995083579154376, "tokens_p.mean_in_band": 0.56005859375, "tokens_rate.above_band": 0.9921951219512195, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007804878048780488 }, { "epoch": 0.603962505325948, "grad_norm": 83.16517769869331, "learning_rate": 3.965288435105877e-07, "loss": 0.3928, "step": 2835, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9087719298245615, "success_rate.epoch.env.math": 0.9592476489028213, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7854232684472612, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650546470255048, "success_rate.epoch.global": 0.8785096238742716, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951421113689095, "tokens_p.mean_in_band": 0.666610054347826, "tokens_rate.above_band": 0.9493392070484582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05066079295154185 }, { "epoch": 0.6050276949296974, "grad_norm": 32.080396169566875, "learning_rate": 3.9651491621004933e-07, "loss": 0.2439, "step": 2840, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9089848308051341, "success_rate.epoch.env.math": 0.9593241551939925, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7859078590785907, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651250104712389, "success_rate.epoch.global": 0.8787237793054821, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975393700787402, "tokens_p.mean_in_band": 0.757421875, "tokens_rate.above_band": 0.927007299270073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.072992700729927 }, { "epoch": 0.606092884533447, "grad_norm": 124.827136915955, "learning_rate": 3.9650096229770247e-07, "loss": 0.3579, "step": 2845, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9079254079254079, "success_rate.epoch.env.math": 0.959349593495935, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7865826204412427, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651506288735927, "success_rate.epoch.global": 0.8787612176667253, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963881909547738, "tokens_p.mean_in_band": 0.67626953125, "tokens_rate.above_band": 0.961352657004831, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03864734299516908 }, { "epoch": 0.6071580741371965, "grad_norm": 63.92735622924593, "learning_rate": 3.964869817837166e-07, "loss": 0.4637, "step": 2850, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9060324825986079, "success_rate.epoch.env.math": 0.9594003747657714, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7858749437696806, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649401677348707, "success_rate.epoch.global": 0.8782715615668365, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9987438474159147, "tokens_p.mean_in_band": 0.4264481707317073, "tokens_rate.above_band": 0.9674603174603175, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03253968253968254 }, { "epoch": 0.6082232637409459, "grad_norm": 80.6302485353995, "learning_rate": 3.964729746782805e-07, "loss": 0.4292, "step": 2855, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9520295202952029, "success_rate.epoch.env.logic": 0.9060324825986079, "success_rate.epoch.env.math": 0.9595519601742377, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7855215827338129, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649934085451899, "success_rate.epoch.global": 0.8782883198877587, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9993196661828737, "tokens_p.mean_in_band": 0.5947265625, "tokens_rate.above_band": 0.9942279942279942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005772005772005772 }, { "epoch": 0.6092884533446954, "grad_norm": 105.17318627883428, "learning_rate": 3.964589409916027e-07, "loss": 0.2579, "step": 2860, "success_rate.epoch.env.abd": 0.984313725490196, "success_rate.epoch.env.agentgym:alfworld": 0.8950617283950617, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9520295202952029, "success_rate.epoch.env.logic": 0.90625, "success_rate.epoch.env.math": 0.9596523898199876, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7853614728334082, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650133716481896, "success_rate.epoch.global": 0.8783263305322129, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9920280612244898, "tokens_p.mean_in_band": 0.7021484375, "tokens_rate.above_band": 0.9245283018867925, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07547169811320754 }, { "epoch": 0.6103536429484449, "grad_norm": 54.64373763484356, "learning_rate": 3.9644488073391063e-07, "loss": 0.3027, "step": 2865, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8957055214723927, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9522058823529411, "success_rate.epoch.env.logic": 0.9063583815028902, "success_rate.epoch.env.math": 0.9597024178549287, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7857463021066786, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651428869623689, "success_rate.epoch.global": 0.8785389723872772, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990821678321679, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9986033519553073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0013966480446927375 }, { "epoch": 0.6114188325521943, "grad_norm": 77.30734784913683, "learning_rate": 3.9643079391545137e-07, "loss": 0.429, "step": 2870, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8957055214723927, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9065743944636678, "success_rate.epoch.env.math": 0.9598021026592455, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7860340196956133, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652136583607363, "success_rate.epoch.global": 0.8787508722958828, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996875, "tokens_p.mean_in_band": 0.880859375, "tokens_rate.above_band": 0.9848484848484849, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015151515151515152 }, { "epoch": 0.6124840221559438, "grad_norm": 54.297149657348896, "learning_rate": 3.964166805464914e-07, "loss": 0.2485, "step": 2875, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9066820276497696, "success_rate.epoch.env.math": 0.9599012954966071, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7855227882037533, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652437980401767, "success_rate.epoch.global": 0.8786137234413096, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995933734939759, "tokens_p.mean_in_band": 0.5027901785714286, "tokens_rate.above_band": 0.9673659673659674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03263403263403263 }, { "epoch": 0.6135492117596932, "grad_norm": 66.39901014287504, "learning_rate": 3.9640254063731625e-07, "loss": 0.3462, "step": 2880, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.906789413118527, "success_rate.epoch.env.math": 0.96, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7859054415700267, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653501128975711, "success_rate.epoch.global": 0.8788247566063978, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971177184466019, "tokens_p.mean_in_band": 0.763671875, "tokens_rate.above_band": 0.9363636363636364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06363636363636363 }, { "epoch": 0.6146144013634427, "grad_norm": 206.63351639348156, "learning_rate": 3.963883741982311e-07, "loss": 0.2259, "step": 2885, "success_rate.epoch.env.abd": 0.9844357976653697, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.906789413118527, "success_rate.epoch.env.math": 0.960098219766728, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7858414959928762, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653742710553213, "success_rate.epoch.global": 0.8788615064213815, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984797297297298, "tokens_p.mean_in_band": 0.3671875, "tokens_rate.above_band": 0.9946236559139785, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005376344086021506 }, { "epoch": 0.6156795909671922, "grad_norm": 85.23586880115933, "learning_rate": 3.963741812395603e-07, "loss": 0.4086, "step": 2890, "success_rate.epoch.env.abd": 0.9844961240310077, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9070034443168772, "success_rate.epoch.env.math": 0.9601959583588487, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7861271676300579, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8654340681274387, "success_rate.epoch.global": 0.8790713790713791, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9888613861386139, "tokens_p.mean_in_band": 0.8489583333333334, "tokens_rate.above_band": 0.9439252336448598, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056074766355140186 }, { "epoch": 0.6167447805709416, "grad_norm": 64.94182937808755, "learning_rate": 3.9635996177164765e-07, "loss": 0.2705, "step": 2895, "success_rate.epoch.env.abd": 0.9845559845559846, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9059633027522935, "success_rate.epoch.env.math": 0.9596823457544288, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7860630270750111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652924286547955, "success_rate.epoch.global": 0.8787616741611899, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9966971544715447, "tokens_p.mean_in_band": 0.46015625, "tokens_rate.above_band": 0.924812030075188, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07518796992481203 }, { "epoch": 0.6178099701746911, "grad_norm": 66.8263466885697, "learning_rate": 3.9634571580485615e-07, "loss": 0.3927, "step": 2900, "success_rate.epoch.env.abd": 0.9845559845559846, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9061784897025171, "success_rate.epoch.env.math": 0.959731543624161, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.786631252766711, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653681205194917, "success_rate.epoch.global": 0.8789709944751382, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9916424418604651, "tokens_p.mean_in_band": 0.776611328125, "tokens_rate.above_band": 0.9148936170212766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0851063829787234 }, { "epoch": 0.6188751597784405, "grad_norm": 0.0, "learning_rate": 3.9633144334956816e-07, "loss": 0.2377, "step": 2905, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.906392694063927, "success_rate.epoch.env.math": 0.9597560975609756, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.786660777385159, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8654137091668251, "success_rate.epoch.global": 0.8790072388831437, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956042199488491, "tokens_p.mean_in_band": 0.5948350694444444, "tokens_rate.above_band": 0.9559902200488998, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044009779951100246 }, { "epoch": 0.61994034938219, "grad_norm": 87.09099619791554, "learning_rate": 3.963171444161853e-07, "loss": 0.2315, "step": 2910, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.906712172923777, "success_rate.epoch.env.math": 0.9591961023142509, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7865961199294532, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8654013171694078, "success_rate.epoch.global": 0.8788713007570543, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947115384615385, "tokens_p.mean_in_band": 0.6510416666666666, "tokens_rate.above_band": 0.9154929577464789, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08450704225352113 }, { "epoch": 0.6210055389859395, "grad_norm": 76.58118430055453, "learning_rate": 3.963028190151286e-07, "loss": 0.4663, "step": 2915, "success_rate.epoch.env.abd": 0.9847908745247148, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.906712172923777, "success_rate.epoch.env.math": 0.9592705167173252, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7869718309859155, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86545819129385, "success_rate.epoch.global": 0.8790793541738234, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957627118644068, "tokens_p.mean_in_band": 0.7613636363636364, "tokens_rate.above_band": 0.9147286821705426, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08527131782945736 }, { "epoch": 0.6220707285896889, "grad_norm": 126.88630377336375, "learning_rate": 3.9628846715683827e-07, "loss": 0.2585, "step": 2920, "success_rate.epoch.env.abd": 0.9847908745247148, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9528985507246377, "success_rate.epoch.env.logic": 0.9069239500567536, "success_rate.epoch.env.math": 0.9593199757134183, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7872527472527473, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8655539224731776, "success_rate.epoch.global": 0.879286694101509, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986881054897739, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9989247311827957, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001075268817204301 }, { "epoch": 0.6231359181934384, "grad_norm": 43.94399546987997, "learning_rate": 3.9627408885177384e-07, "loss": 0.2353, "step": 2925, "success_rate.epoch.env.abd": 0.9849056603773585, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9528985507246377, "success_rate.epoch.env.logic": 0.9070294784580499, "success_rate.epoch.env.math": 0.9593939393939394, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7871873628784555, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8655747309786477, "success_rate.epoch.global": 0.8793221499486478, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945913461538461, "tokens_p.mean_in_band": 0.5712890625, "tokens_rate.above_band": 0.8666666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13333333333333333 }, { "epoch": 0.6242011077971878, "grad_norm": 147.44890806931758, "learning_rate": 3.962596841104142e-07, "loss": 0.3198, "step": 2930, "success_rate.epoch.env.abd": 0.9849056603773585, "success_rate.epoch.env.agentgym:alfworld": 0.8963414634146342, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9071347678369196, "success_rate.epoch.env.math": 0.9595166163141994, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7870289219982471, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652683181103562, "success_rate.epoch.global": 0.8791866028708134, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9966773216689099, "tokens_p.mean_in_band": 0.6368680334394905, "tokens_rate.above_band": 0.9341994970662196, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06580050293378038 }, { "epoch": 0.6252662974009374, "grad_norm": 130.25080165861453, "learning_rate": 3.9624525294325727e-07, "loss": 0.2884, "step": 2935, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.891566265060241, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9071347678369196, "success_rate.epoch.env.math": 0.9595654797827399, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7871222076215506, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.864872543159927, "success_rate.epoch.global": 0.8792221084953941, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978813559322034, "tokens_p.mean_in_band": 0.65087890625, "tokens_rate.above_band": 0.9925233644859813, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007476635514018692 }, { "epoch": 0.6263314870046869, "grad_norm": 65.69813651884323, "learning_rate": 3.962307953608205e-07, "loss": 0.2149, "step": 2940, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9071347678369196, "success_rate.epoch.env.math": 0.9596871239470517, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7874945343244425, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649764772207669, "success_rate.epoch.global": 0.8794277929155313, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969951923076923, "tokens_p.mean_in_band": 0.74560546875, "tokens_rate.above_band": 0.9873417721518988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012658227848101266 }, { "epoch": 0.6273966766084363, "grad_norm": 77.82909573964751, "learning_rate": 3.962163113736404e-07, "loss": 0.2132, "step": 2945, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9072398190045249, "success_rate.epoch.env.math": 0.9597113650030066, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7871783689489752, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649910468267976, "success_rate.epoch.global": 0.8792927575654539, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973190348525469, "tokens_p.mean_in_band": 0.5862379807692307, "tokens_rate.above_band": 0.966321243523316, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03367875647668394 }, { "epoch": 0.6284618662121858, "grad_norm": 124.50361686150308, "learning_rate": 3.9620180099227287e-07, "loss": 0.3018, "step": 2950, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9072398190045249, "success_rate.epoch.env.math": 0.959832134292566, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7875489769264258, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8650505913570208, "success_rate.epoch.global": 0.8794976238968092, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9924903100775194, "tokens_p.mean_in_band": 0.7513020833333334, "tokens_rate.above_band": 0.9555555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044444444444444446 }, { "epoch": 0.6295270558159353, "grad_norm": 101.09664789858942, "learning_rate": 3.961872642272929e-07, "loss": 0.4169, "step": 2955, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9064261555806088, "success_rate.epoch.env.math": 0.959832134292566, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7881024750325663, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651185821932349, "success_rate.epoch.global": 0.8795323619112165, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940232240437158, "tokens_p.mean_in_band": 0.6376378676470589, "tokens_rate.above_band": 0.915, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.085 }, { "epoch": 0.6305922454196847, "grad_norm": 50.78340309572184, "learning_rate": 3.9617270108929483e-07, "loss": 0.3334, "step": 2960, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9064261555806088, "success_rate.epoch.env.math": 0.959904248952723, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7882200086617583, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.865152232106874, "success_rate.epoch.global": 0.8795669824086604, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945652173913043, "tokens_p.mean_in_band": 0.6083333333333333, "tokens_rate.above_band": 0.9608355091383812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0391644908616188 }, { "epoch": 0.6316574350234342, "grad_norm": 89.86553292487679, "learning_rate": 3.9615811158889214e-07, "loss": 0.4859, "step": 2965, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9064261555806088, "success_rate.epoch.env.math": 0.9599521817095039, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7879965457685665, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.865150994557968, "success_rate.epoch.global": 0.8794326241134752, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934505988023952, "tokens_p.mean_in_band": 0.4755108173076923, "tokens_rate.above_band": 0.9277777777777778, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07222222222222222 }, { "epoch": 0.6327226246271836, "grad_norm": 61.02038454128402, "learning_rate": 3.961434957367175e-07, "loss": 0.3879, "step": 2970, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9065315315315315, "success_rate.epoch.env.math": 0.9600238663484487, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.7884532529082292, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8652135795527743, "success_rate.epoch.global": 0.8796358732299393, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956995412844036, "tokens_p.mean_in_band": 0.6962890625, "tokens_rate.above_band": 0.9646017699115044, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035398230088495575 }, { "epoch": 0.6337878142309331, "grad_norm": 24.652628933243395, "learning_rate": 3.96128853543423e-07, "loss": 0.251, "step": 2975, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8922155688622755, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9057239057239057, "success_rate.epoch.env.math": 0.9601190476190476, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7882049074472665, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8648021384267214, "success_rate.epoch.global": 0.8793335577246718, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9986877916018663, "tokens_p.mean_in_band": 0.5513888888888889, "tokens_rate.above_band": 0.934593023255814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06540697674418605 }, { "epoch": 0.6348530038346826, "grad_norm": 92.98172277335975, "learning_rate": 3.9611418501967965e-07, "loss": 0.2789, "step": 2980, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9057239057239057, "success_rate.epoch.env.math": 0.960166468489893, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.7887505367110348, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8646088002077821, "success_rate.epoch.global": 0.8793682795698925, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958428899082569, "tokens_p.mean_in_band": 0.6463815789473685, "tokens_rate.above_band": 0.919831223628692, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08016877637130802 }, { "epoch": 0.635918193438432, "grad_norm": 211.65083339587707, "learning_rate": 3.9609949017617773e-07, "loss": 0.4691, "step": 2985, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.905829596412556, "success_rate.epoch.env.math": 0.9595959595959596, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.788865096359743, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8646915498118826, "success_rate.epoch.global": 0.8792351559879236, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9962060702875399, "tokens_p.mean_in_band": 0.4326171875, "tokens_rate.above_band": 0.9827315541601256, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01726844583987441 }, { "epoch": 0.6369833830421815, "grad_norm": 315.10729174699105, "learning_rate": 3.9608476902362684e-07, "loss": 0.4177, "step": 2990, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9060402684563759, "success_rate.epoch.env.math": 0.9596678529062871, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.7887986318939718, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8647274872989654, "success_rate.epoch.global": 0.87926992632284, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974922839506173, "tokens_p.mean_in_band": 0.5529513888888888, "tokens_rate.above_band": 0.9818181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01818181818181818 }, { "epoch": 0.638048572645931, "grad_norm": 62.47016863990783, "learning_rate": 3.960700215727556e-07, "loss": 0.4276, "step": 2995, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.950530035335689, "success_rate.epoch.env.logic": 0.9051339285714286, "success_rate.epoch.env.math": 0.9596917605216361, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.7886421861656704, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.864767317029398, "success_rate.epoch.global": 0.8791374122367102, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973591549295775, "tokens_p.mean_in_band": 0.5703125, "tokens_rate.above_band": 0.9848084544253632, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015191545574636724 }, { "epoch": 0.6391137622496804, "grad_norm": 70.06722331362305, "learning_rate": 3.9605524783431176e-07, "loss": 0.22, "step": 3000, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8947368421052632, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9507042253521126, "success_rate.epoch.env.logic": 0.9053452115812918, "success_rate.epoch.env.math": 0.9597394908229722, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7889125799573561, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671242230993002, "success_rate.epoch.global": 0.8793391188251002, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982576438848921, "tokens_p.mean_in_band": 0.8229166666666666, "tokens_rate.above_band": 0.9946332737030411, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005366726296958855 }, { "epoch": 0.6401789518534299, "grad_norm": 55.41039715773697, "learning_rate": 3.960404478190625e-07, "loss": 0.3152, "step": 3005, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.9055555555555556, "success_rate.epoch.env.math": 0.9597871082199881, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7891822827938672, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867248486025728, "success_rate.epoch.global": 0.8795401532822392, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996873262923847, "tokens_p.mean_in_band": 0.7981770833333334, "tokens_rate.above_band": 0.9983351831298557, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001664816870144284 }, { "epoch": 0.6412441414571793, "grad_norm": 50.43401833071595, "learning_rate": 3.960256215377938e-07, "loss": 0.3123, "step": 3010, "success_rate.epoch.env.abd": 0.9853479853479854, "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.9058693244739756, "success_rate.epoch.env.math": 0.9598346131128175, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7895408163265306, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673188201207135, "success_rate.epoch.global": 0.8797405189620758, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965679190751445, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.9885714285714285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011428571428571429 }, { "epoch": 0.6423093310609288, "grad_norm": 69.76955269122864, "learning_rate": 3.9601076900131104e-07, "loss": 0.2167, "step": 3015, "success_rate.epoch.env.abd": 0.9853479853479854, "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.906284454244763, "success_rate.epoch.env.math": 0.9599056603773585, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7898089171974523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673873908394634, "success_rate.epoch.global": 0.8799402191962803, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959077380952381, "tokens_p.mean_in_band": 0.72314453125, "tokens_rate.above_band": 0.9545454545454546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045454545454545456 }, { "epoch": 0.6433745206646783, "grad_norm": 173.826529122372, "learning_rate": 3.959958902204386e-07, "loss": 0.4126, "step": 3020, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.951048951048951, "success_rate.epoch.env.logic": 0.9063876651982379, "success_rate.epoch.env.math": 0.9599528857479388, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7901653242899533, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674685111884716, "success_rate.epoch.global": 0.8801392572944297, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943076208178439, "tokens_p.mean_in_band": 0.8450520833333334, "tokens_rate.above_band": 0.9889705882352942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011029411764705883 }, { "epoch": 0.6444397102684278, "grad_norm": 115.61607271610774, "learning_rate": 3.9598098520602007e-07, "loss": 0.3508, "step": 3025, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.951048951048951, "success_rate.epoch.env.logic": 0.9065934065934066, "success_rate.epoch.env.math": 0.9600235155790712, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7900973338975879, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675018734717671, "success_rate.epoch.global": 0.8801721284342933, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953703703703703, "tokens_p.mean_in_band": 0.52734375, "tokens_rate.above_band": 0.9854014598540146, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014598540145985401 }, { "epoch": 0.6455048998721773, "grad_norm": 67.72661895035077, "learning_rate": 3.9596605396891807e-07, "loss": 0.3197, "step": 3030, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9066959385290889, "success_rate.epoch.env.math": 0.9601173020527859, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7897631133671743, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675287308735146, "success_rate.epoch.global": 0.8802048909451421, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992937853107344, "tokens_p.mean_in_band": 0.43526785714285715, "tokens_rate.above_band": 0.9902097902097902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009790209790209791 }, { "epoch": 0.6465700894759268, "grad_norm": 175.8165967116826, "learning_rate": 3.9595109652001433e-07, "loss": 0.4834, "step": 3035, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8953488372093024, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9067982456140351, "success_rate.epoch.env.math": 0.9601873536299765, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.789873417721519, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675544275113765, "success_rate.epoch.global": 0.8802375453645661, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901315789473685, "tokens_p.mean_in_band": 0.41573660714285715, "tokens_rate.above_band": 0.9313725490196079, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06862745098039216 }, { "epoch": 0.6476352790796762, "grad_norm": 93.66628429899336, "learning_rate": 3.959361128702099e-07, "loss": 0.3501, "step": 3040, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8901734104046243, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9059080962800875, "success_rate.epoch.env.math": 0.960233918128655, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7898947368421053, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867009182736889, "success_rate.epoch.global": 0.8799407114624506, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9992794296116505, "tokens_p.mean_in_band": 0.5702582465277778, "tokens_rate.above_band": 0.958139534883721, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04186046511627907 }, { "epoch": 0.6487004686834257, "grad_norm": 113.88749145748248, "learning_rate": 3.9592110303042457e-07, "loss": 0.3678, "step": 3045, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8901734104046243, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9050218340611353, "success_rate.epoch.env.math": 0.9603729603729604, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.790071518721077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669573247281913, "success_rate.epoch.global": 0.8799736928641894, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0007148692810457, "tokens_p.mean_in_band": 0.4453125, "tokens_rate.above_band": 0.9429892141756548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05701078582434515 }, { "epoch": 0.6497656582871751, "grad_norm": 66.08946418028508, "learning_rate": 3.959060670115976e-07, "loss": 0.3665, "step": 3050, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8901734104046243, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9050218340611353, "success_rate.epoch.env.math": 0.9604421175101804, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.790180444817457, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670238513727039, "success_rate.epoch.global": 0.8800065659881812, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962748344370861, "tokens_p.mean_in_band": 0.5110677083333334, "tokens_rate.above_band": 0.9263803680981595, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0736196319018405 }, { "epoch": 0.6508308478909246, "grad_norm": 161.5359544401479, "learning_rate": 3.958910048246869e-07, "loss": 0.3103, "step": 3055, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8908045977011494, "success_rate.epoch.env.agentgym:sciworld": 0.9695431472081218, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9051254089422028, "success_rate.epoch.env.math": 0.9605110336817654, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7904442581726739, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671350226282015, "success_rate.epoch.global": 0.8802032120616191, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987273755656109, "tokens_p.mean_in_band": 0.7272135416666666, "tokens_rate.above_band": 0.9910313901345291, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008968609865470852 }, { "epoch": 0.6518960374946741, "grad_norm": 78.9716837308335, "learning_rate": 3.9587591648066984e-07, "loss": 0.2134, "step": 3060, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8914285714285715, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9052287581699346, "success_rate.epoch.env.math": 0.9605797101449275, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7907949790794979, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8672532537931094, "success_rate.epoch.global": 0.8803992146596858, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987725040916531, "tokens_p.mean_in_band": 0.7825520833333334, "tokens_rate.above_band": 0.995114006514658, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004885993485342019 }, { "epoch": 0.6529612270984235, "grad_norm": 113.67140442021879, "learning_rate": 3.958608019905427e-07, "loss": 0.4321, "step": 3065, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8920454545454546, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9055374592833876, "success_rate.epoch.env.math": 0.9606709080393291, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7908824759514848, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673674861141013, "success_rate.epoch.global": 0.8805945769356419, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985023961661342, "tokens_p.mean_in_band": 0.802734375, "tokens_rate.above_band": 0.9750778816199377, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024922118380062305 }, { "epoch": 0.654026416702173, "grad_norm": 133.36748476007517, "learning_rate": 3.958456613653208e-07, "loss": 0.4466, "step": 3070, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8920454545454546, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.905742145178765, "success_rate.epoch.env.math": 0.9606709080393291, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.7914059240717564, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8646081689058666, "success_rate.epoch.global": 0.8806262230919765, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954173166926678, "tokens_p.mean_in_band": 0.6647135416666666, "tokens_rate.above_band": 0.946824224519941, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053175775480059084 }, { "epoch": 0.6550916063059224, "grad_norm": 62.85620699386314, "learning_rate": 3.958304946160384e-07, "loss": 0.336, "step": 3075, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8932584269662921, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.9058441558441559, "success_rate.epoch.env.math": 0.9606936416184971, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.791007493755204, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8643523082326928, "success_rate.epoch.global": 0.8803321393682839, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985991379310345, "tokens_p.mean_in_band": 0.584703947368421, "tokens_rate.above_band": 0.9682804674457429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03171953255425709 }, { "epoch": 0.6561567959096719, "grad_norm": 75.00990374215259, "learning_rate": 3.95815301753749e-07, "loss": 0.2635, "step": 3080, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8932584269662921, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.906047516198704, "success_rate.epoch.env.math": 0.9607843137254902, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.790765391014975, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8643665076995899, "success_rate.epoch.global": 0.8803641092327699, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980085784313726, "tokens_p.mean_in_band": 0.4800347222222222, "tokens_rate.above_band": 0.9577464788732394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04225352112676056 }, { "epoch": 0.6572219855134214, "grad_norm": 301.4585408016698, "learning_rate": 3.958000827895251e-07, "loss": 0.2665, "step": 3085, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8932584269662921, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.90625, "success_rate.epoch.env.math": 0.9608294930875576, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7907845579078456, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8644060562989491, "success_rate.epoch.global": 0.8803959753326842, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959435096153846, "tokens_p.mean_in_band": 0.7327008928571429, "tokens_rate.above_band": 0.9674418604651163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03255813953488372 }, { "epoch": 0.6582871751171708, "grad_norm": 107.86347945791175, "learning_rate": 3.957848377344581e-07, "loss": 0.4075, "step": 3090, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8938547486033519, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9063509149623251, "success_rate.epoch.env.math": 0.9609419873635842, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7910447761194029, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645033243977643, "success_rate.epoch.global": 0.8805897602073882, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946524064171123, "tokens_p.mean_in_band": 0.8151041666666666, "tokens_rate.above_band": 0.9842105263157894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015789473684210527 }, { "epoch": 0.6593523647209203, "grad_norm": 39.961257713514556, "learning_rate": 3.957695665996586e-07, "loss": 0.2415, "step": 3095, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8938547486033519, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9065520945220193, "success_rate.epoch.env.math": 0.9609868043602984, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7910633016135705, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645410767449845, "success_rate.epoch.global": 0.8806211582012293, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968971631205674, "tokens_p.mean_in_band": 0.57666015625, "tokens_rate.above_band": 0.9463087248322147, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053691275167785234 }, { "epoch": 0.6604175543246698, "grad_norm": 181.14104179507885, "learning_rate": 3.95754269396256e-07, "loss": 0.3315, "step": 3100, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8944444444444445, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9065520945220193, "success_rate.epoch.env.math": 0.9605488850771869, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7909090909090909, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645408554589391, "success_rate.epoch.global": 0.8804909560723514, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972014925373134, "tokens_p.mean_in_band": 0.5950520833333334, "tokens_rate.above_band": 0.9710144927536232, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028985507246376812 }, { "epoch": 0.6614827439284192, "grad_norm": 139.8058433972585, "learning_rate": 3.9573894613539876e-07, "loss": 0.4565, "step": 3105, "success_rate.epoch.env.abd": 0.985663082437276, "success_rate.epoch.env.agentgym:alfworld": 0.8944444444444445, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9517241379310345, "success_rate.epoch.env.logic": 0.9055793991416309, "success_rate.epoch.env.math": 0.9606164383561644, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7908415841584159, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8644723070315984, "success_rate.epoch.global": 0.8803611738148984, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997501369112815, "tokens_p.mean_in_band": 0.5556640625, "tokens_rate.above_band": 0.9661375661375662, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033862433862433865 }, { "epoch": 0.6625479335321687, "grad_norm": 112.5701285692833, "learning_rate": 3.9572359682825435e-07, "loss": 0.2538, "step": 3110, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8944444444444445, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9517241379310345, "success_rate.epoch.env.logic": 0.9057815845824411, "success_rate.epoch.env.math": 0.9607061503416856, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7911001236093943, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645270015910819, "success_rate.epoch.global": 0.8805537669027689, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951171875, "tokens_p.mean_in_band": 0.484375, "tokens_rate.above_band": 0.9896907216494846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010309278350515464 }, { "epoch": 0.6636131231359182, "grad_norm": 101.16825630521417, "learning_rate": 3.957082214860094e-07, "loss": 0.4569, "step": 3115, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8950276243093923, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9517241379310345, "success_rate.epoch.env.logic": 0.9060832443970117, "success_rate.epoch.env.math": 0.9607731665719159, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7905349794238683, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645621572023748, "success_rate.epoch.global": 0.8804243008678881, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986323851203501, "tokens_p.mean_in_band": 0.3902994791666667, "tokens_rate.above_band": 0.9870410367170627, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012958963282937365 }, { "epoch": 0.6646783127396677, "grad_norm": 35.52048006219275, "learning_rate": 3.956928201198691e-07, "loss": 0.1316, "step": 3120, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8950276243093923, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.952054794520548, "success_rate.epoch.env.logic": 0.9060832443970117, "success_rate.epoch.env.math": 0.9608399545970489, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7908792111750206, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8646342040393453, "success_rate.epoch.global": 0.8806161745827985, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989965596330275, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9984732824427481, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0015267175572519084 }, { "epoch": 0.6657435023434172, "grad_norm": 241.4843036383521, "learning_rate": 3.9567739274105814e-07, "loss": 0.2839, "step": 3125, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8956043956043956, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.952054794520548, "success_rate.epoch.env.logic": 0.9053191489361702, "success_rate.epoch.env.math": 0.9609065155807366, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7911366434140337, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8646466284990599, "success_rate.epoch.global": 0.880647228452419, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99581589958159, "tokens_p.mean_in_band": 0.741875, "tokens_rate.above_band": 0.9502982107355865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04970178926441352 }, { "epoch": 0.6668086919471666, "grad_norm": 100.44127427879963, "learning_rate": 3.9566193936081965e-07, "loss": 0.3357, "step": 3130, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8961748633879781, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9053191489361702, "success_rate.epoch.env.math": 0.9609507640067911, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7910692339205244, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8648058522770783, "success_rate.epoch.global": 0.880678182981446, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965356871678056, "tokens_p.mean_below_band": 2.8405338525772095e-08, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9984836997725549, "tokens_rate.below_band": 0.000758150113722517, "tokens_rate.in_band": 0.000758150113722517 }, { "epoch": 0.6678738815509161, "grad_norm": 91.8348046926915, "learning_rate": 3.9564645999041603e-07, "loss": 0.4371, "step": 3135, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8961748633879781, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9053191489361702, "success_rate.epoch.env.math": 0.961038961038961, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7906786590351594, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8647917975605721, "success_rate.epoch.global": 0.8805493452571064, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994860197368421, "tokens_p.mean_in_band": 0.49885110294117646, "tokens_rate.above_band": 0.8994082840236687, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10059171597633136 }, { "epoch": 0.6689390711546656, "grad_norm": 31.807500765998263, "learning_rate": 3.956309546411285e-07, "loss": 0.4623, "step": 3140, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9054197662061636, "success_rate.epoch.env.math": 0.9606299212598425, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7907642010625255, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8648228327028491, "success_rate.epoch.global": 0.8805803571428571, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8571428571428571, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985677083333333, "tokens_p.mean_in_band": 0.4677734375, "tokens_rate.above_band": 0.9836065573770492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01639344262295082 }, { "epoch": 0.670004260758415, "grad_norm": 29.05227497055417, "learning_rate": 3.956154233242573e-07, "loss": 0.1436, "step": 3145, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9055201698513801, "success_rate.epoch.env.math": 0.9607623318385651, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.7906122448979592, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8648301834355191, "success_rate.epoch.global": 0.8806112702960841, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925595238095238, "tokens_p.mean_in_band": 0.3203125, "tokens_rate.above_band": 0.9130434782608695, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08695652173913043 }, { "epoch": 0.6710694503621645, "grad_norm": 91.6137060369997, "learning_rate": 3.955998660511216e-07, "loss": 0.4673, "step": 3150, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.9607843137254902, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.7909535452322738, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8644842879278625, "success_rate.epoch.global": 0.8804831532104259, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004052593659942, "tokens_p.mean_in_band": 0.5079012784090909, "tokens_rate.above_band": 0.940379403794038, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05962059620596206 }, { "epoch": 0.6721346399659139, "grad_norm": 52.52254948832143, "learning_rate": 3.955842828330593e-07, "loss": 0.318, "step": 3155, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9714285714285714, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.904862579281184, "success_rate.epoch.env.math": 0.9608501118568232, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.7912087912087912, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8646284695286632, "success_rate.epoch.global": 0.8806728022849889, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972893432465924, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9975278121137207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002472187886279357 }, { "epoch": 0.6731998295696634, "grad_norm": 84.86463189748122, "learning_rate": 3.9556867368142736e-07, "loss": 0.3367, "step": 3160, "success_rate.epoch.env.abd": 0.9858156028368794, "success_rate.epoch.env.agentgym:alfworld": 0.8967391304347826, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9714285714285714, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9050632911392406, "success_rate.epoch.env.math": 0.9609156895589056, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7910569105691057, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8643367484412887, "success_rate.epoch.global": 0.8805449936628644, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9932885906040269, "tokens_p.mean_in_band": 0.65375, "tokens_rate.above_band": 0.8563218390804598, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14367816091954022 }, { "epoch": 0.6742650191734129, "grad_norm": 107.55001791589437, "learning_rate": 3.955530386076017e-07, "loss": 0.2526, "step": 3165, "success_rate.epoch.env.abd": 0.9858657243816255, "success_rate.epoch.env.agentgym:alfworld": 0.8972972972972973, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9714285714285714, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9052631578947369, "success_rate.epoch.env.math": 0.9609375, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7909902597402597, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8644193121063275, "success_rate.epoch.global": 0.8805757671622905, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975678066037735, "tokens_p.mean_in_band": 0.5662109375, "tokens_rate.above_band": 0.9769585253456221, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02304147465437788 }, { "epoch": 0.6753302087771623, "grad_norm": 223.61082910181617, "learning_rate": 3.9553737762297687e-07, "loss": 0.6083, "step": 3170, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8983957219251337, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9053627760252366, "success_rate.epoch.env.math": 0.9609375, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7906034831915756, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645697380680933, "success_rate.epoch.global": 0.8804485154769425, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986087328767124, "tokens_p.mean_in_band": 0.5404575892857143, "tokens_rate.above_band": 0.9765886287625418, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023411371237458192 }, { "epoch": 0.6763953983809118, "grad_norm": 146.32987137747477, "learning_rate": 3.955216907389667e-07, "loss": 0.3268, "step": 3175, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.898936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9046121593291404, "success_rate.epoch.env.math": 0.9610027855153204, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.790453074433657, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645428933725422, "success_rate.epoch.global": 0.880321665089877, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963114754098361, "tokens_p.mean_in_band": 0.6072916666666667, "tokens_rate.above_band": 0.953125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046875 }, { "epoch": 0.6774605879846612, "grad_norm": 52.62957023119499, "learning_rate": 3.955059779670036e-07, "loss": 0.2357, "step": 3180, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.898936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9707317073170731, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9047120418848168, "success_rate.epoch.env.math": 0.9610678531701891, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7903030303030303, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8645662452960979, "success_rate.epoch.global": 0.8803526448362721, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995019920318725, "tokens_p.mean_in_band": 0.68212890625, "tokens_rate.above_band": 0.9920948616600791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007905138339920948 }, { "epoch": 0.6785257775884107, "grad_norm": 84.92924496189973, "learning_rate": 3.954902393185389e-07, "loss": 0.4566, "step": 3185, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9047120418848168, "success_rate.epoch.env.math": 0.9611111111111111, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.790641387656313, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8647105656753827, "success_rate.epoch.global": 0.8805219305140701, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987383540372671, "tokens_p.mean_in_band": 0.5364583333333334, "tokens_rate.above_band": 0.9953632148377125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00463678516228748 }, { "epoch": 0.6795909671921602, "grad_norm": 148.25663049601903, "learning_rate": 3.9547447480504283e-07, "loss": 0.3963, "step": 3190, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9527027027027027, "success_rate.epoch.env.logic": 0.9047120418848168, "success_rate.epoch.env.math": 0.9611111111111111, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7908286403861625, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8648410437293848, "success_rate.epoch.global": 0.8805525035316277, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980039491758241, "tokens_p.mean_in_band": 0.7965745192307693, "tokens_rate.above_band": 0.9911504424778761, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008849557522123894 }, { "epoch": 0.6806561567959096, "grad_norm": 29.239085193637013, "learning_rate": 3.9545868443800446e-07, "loss": 0.2416, "step": 3195, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.900523560209424, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9527027027027027, "success_rate.epoch.env.logic": 0.9049111807732497, "success_rate.epoch.env.math": 0.9611327040533038, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.790529695024077, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8648815297910178, "success_rate.epoch.global": 0.8804262654756307, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975665983606558, "tokens_p.mean_in_band": 0.5225694444444444, "tokens_rate.above_band": 0.9644268774703557, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03557312252964427 }, { "epoch": 0.6817213463996591, "grad_norm": 295.43416358103303, "learning_rate": 3.9544286822893164e-07, "loss": 0.3241, "step": 3200, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.900523560209424, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9528619528619529, "success_rate.epoch.env.logic": 0.9051094890510949, "success_rate.epoch.env.math": 0.9611542730299667, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7909491389667601, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8649541271870579, "success_rate.epoch.global": 0.8805946791862285, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999334094368341, "tokens_p.mean_in_band": 0.753125, "tokens_rate.above_band": 0.9924471299093656, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0075528700906344415 }, { "epoch": 0.6827865360034087, "grad_norm": 85.90754617058761, "learning_rate": 3.9542702618935114e-07, "loss": 0.3404, "step": 3205, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.9010416666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9530201342281879, "success_rate.epoch.env.logic": 0.9051094890510949, "success_rate.epoch.env.math": 0.9612403100775194, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7900839664134346, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8649447773977584, "success_rate.epoch.global": 0.8803125, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9992541766109785, "tokens_p.mean_in_band": 0.6747532894736842, "tokens_rate.above_band": 0.9778296382730455, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022170361726954493 }, { "epoch": 0.6838517256071581, "grad_norm": 47.95526373441156, "learning_rate": 3.954111583308086e-07, "loss": 0.3169, "step": 3210, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.9015544041450777, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9043659043659044, "success_rate.epoch.env.math": 0.9612831858407079, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7902516979624451, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8649616515030641, "success_rate.epoch.global": 0.8803432137285492, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982945810914681, "tokens_p.mean_in_band": 0.5130208333333334, "tokens_rate.above_band": 0.9886018237082067, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011398176291793313 }, { "epoch": 0.6849169152109076, "grad_norm": 866.405241175585, "learning_rate": 3.953952646648683e-07, "loss": 0.4581, "step": 3215, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.9015544041450777, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9043659043659044, "success_rate.epoch.env.math": 0.9613473219215903, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7899561578318055, "success_rate.epoch.env.webshop": 0.9705882352941176, "success_rate.epoch.env_macro_mean": 0.8650216389164633, "success_rate.epoch.global": 0.8802180685358255, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976027397260274, "tokens_p.mean_in_band": 0.4996995192307692, "tokens_rate.above_band": 0.9656084656084656, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03439153439153439 }, { "epoch": 0.685982104814657, "grad_norm": 114.41905403715027, "learning_rate": 3.9537934520311346e-07, "loss": 0.3582, "step": 3220, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9711538461538461, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9046632124352332, "success_rate.epoch.env.math": 0.9613899613899614, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7898089171974523, "success_rate.epoch.env.webshop": 0.9705882352941176, "success_rate.epoch.env_macro_mean": 0.8650979581736085, "success_rate.epoch.global": 0.880248833592535, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969798657718121, "tokens_p.mean_in_band": 0.5189732142857143, "tokens_rate.above_band": 0.9906914893617021, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009308510638297872 }, { "epoch": 0.6870472944184065, "grad_norm": 535.4498004762445, "learning_rate": 3.953633999571461e-07, "loss": 0.3363, "step": 3225, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9712918660287081, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9048603929679421, "success_rate.epoch.env.math": 0.9614325068870524, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7900596421471173, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8652358854763228, "success_rate.epoch.global": 0.8804347826086957, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996789383561644, "tokens_p.mean_in_band": 0.857421875, "tokens_rate.above_band": 0.9965870307167235, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0034129692832764505 }, { "epoch": 0.688112484022156, "grad_norm": 38.29977573786904, "learning_rate": 3.95347428938587e-07, "loss": 0.3186, "step": 3230, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9712918660287081, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9048603929679421, "success_rate.epoch.env.math": 0.9614961496149615, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.790079365079365, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8652620218315472, "success_rate.epoch.global": 0.8804651162790698, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985981308411215, "tokens_p.mean_in_band": 0.7447916666666666, "tokens_rate.above_band": 0.9834558823529411, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016544117647058824 }, { "epoch": 0.6891776736259054, "grad_norm": 171.37582553234643, "learning_rate": 3.953314321590757e-07, "loss": 0.3422, "step": 3235, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9049586776859504, "success_rate.epoch.env.math": 0.9615595826468973, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7904950495049505, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8653269407020374, "success_rate.epoch.global": 0.8806501547987616, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9935213414634146, "tokens_p.mean_in_band": 0.796875, "tokens_rate.above_band": 0.9704142011834319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029585798816568046 }, { "epoch": 0.6902428632296549, "grad_norm": 514.0063047416629, "learning_rate": 3.953154096302705e-07, "loss": 0.2892, "step": 3240, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9050567595459237, "success_rate.epoch.env.math": 0.9616438356164384, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7905138339920948, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.865345224276279, "success_rate.epoch.global": 0.8806800618238022, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9939759036144579, "tokens_p.mean_in_band": 0.5625, "tokens_rate.above_band": 0.8736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12631578947368421 }, { "epoch": 0.6913080528334044, "grad_norm": 67.02260207537304, "learning_rate": 3.952993613638485e-07, "loss": 0.1984, "step": 3245, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9715639810426541, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9501661129568106, "success_rate.epoch.env.logic": 0.9053497942386831, "success_rate.epoch.env.math": 0.9616648411829135, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.790761942360837, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8651250483551053, "success_rate.epoch.global": 0.8807098765432099, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996900826446281, "tokens_p.mean_in_band": 0.6171875, "tokens_rate.above_band": 0.983739837398374, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016260162601626018 }, { "epoch": 0.6923732424371538, "grad_norm": 104.4845828693136, "learning_rate": 3.9528328737150573e-07, "loss": 0.1751, "step": 3250, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9503311258278145, "success_rate.epoch.env.logic": 0.9054470709146968, "success_rate.epoch.env.math": 0.9617277200656096, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7906151419558359, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8652217075315015, "success_rate.epoch.global": 0.8807395993836672, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982707509881423, "tokens_p.mean_in_band": 0.5966796875, "tokens_rate.above_band": 0.9768339768339769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023166023166023165 }, { "epoch": 0.6934384320409033, "grad_norm": 129.07205380525727, "learning_rate": 3.9526718766495663e-07, "loss": 0.6632, "step": 3255, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.9020618556701031, "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9504950495049505, "success_rate.epoch.env.logic": 0.9047131147540983, "success_rate.epoch.env.math": 0.9612445414847162, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.790862544308783, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8651484521941007, "success_rate.epoch.global": 0.8806153846153846, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945570570570571, "tokens_p.mean_in_band": 0.6122159090909091, "tokens_rate.above_band": 0.9680232558139535, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03197674418604651 }, { "epoch": 0.6945036216446527, "grad_norm": 77.98550794680186, "learning_rate": 3.9525106225593454e-07, "loss": 0.1984, "step": 3260, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.9025641025641026, "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.9050051072522982, "success_rate.epoch.env.math": 0.9612868047982552, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7909448818897638, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8653336443492431, "success_rate.epoch.global": 0.8807987711213517, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997093023255814, "tokens_p.mean_in_band": 0.7395833333333334, "tokens_rate.above_band": 0.996523754345307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0034762456546929316 }, { "epoch": 0.6955688112484022, "grad_norm": 181.03549815716272, "learning_rate": 3.9523491115619166e-07, "loss": 0.284, "step": 3265, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9509803921568627, "success_rate.epoch.env.logic": 0.9051987767584098, "success_rate.epoch.env.math": 0.9608056614044638, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7911915060951632, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8653897345555275, "success_rate.epoch.global": 0.8808282208588957, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979979108635098, "tokens_p.mean_in_band": 0.7078125, "tokens_rate.above_band": 0.9862637362637363, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013736263736263736 }, { "epoch": 0.6966340008521517, "grad_norm": 48.56063784795593, "learning_rate": 3.9521873437749874e-07, "loss": 0.2744, "step": 3270, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, "success_rate.epoch.env.agentgym:sciworld": 0.9716981132075472, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9509803921568627, "success_rate.epoch.env.logic": 0.9053916581892166, "success_rate.epoch.env.math": 0.9609120521172638, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7910447761194029, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8654036020253316, "success_rate.epoch.global": 0.8808575803981623, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936835106382979, "tokens_p.mean_in_band": 0.638671875, "tokens_rate.above_band": 0.8867924528301887, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11320754716981132 }, { "epoch": 0.6976991904559011, "grad_norm": 184.814521263067, "learning_rate": 3.9520253193164525e-07, "loss": 0.2215, "step": 3275, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9511400651465798, "success_rate.epoch.env.logic": 0.9046653144016227, "success_rate.epoch.env.math": 0.9609332609875203, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7912087912087912, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8654612208914592, "success_rate.epoch.global": 0.8808868501529052, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992125331564987, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.9856209150326798, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01437908496732026 }, { "epoch": 0.6987643800596506, "grad_norm": 215.02758437928475, "learning_rate": 3.951863038304395e-07, "loss": 0.4055, "step": 3280, "success_rate.epoch.env.abd": 0.9862542955326461, "success_rate.epoch.env.agentgym:alfworld": 0.9030612244897959, "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9512987012987013, "success_rate.epoch.env.logic": 0.9040404040404041, "success_rate.epoch.env.math": 0.9604550379198267, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7908983915260887, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8653514483479526, "success_rate.epoch.global": 0.8806106870229008, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.6833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9965625, "tokens_p.mean_below_band": 6.007030606269836e-08, "tokens_p.mean_in_band": 0.506103515625, "tokens_rate.above_band": 0.9823182711198428, "tokens_rate.below_band": 0.0019646365422396855, "tokens_rate.in_band": 0.015717092337917484 }, { "epoch": 0.6998295696634, "grad_norm": 161.25177275927612, "learning_rate": 3.9517005008570833e-07, "loss": 0.3882, "step": 3285, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8984771573604061, "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9512987012987013, "success_rate.epoch.env.logic": 0.9041372351160444, "success_rate.epoch.env.math": 0.9605191995673337, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.791226008617313, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8649834135311661, "success_rate.epoch.global": 0.880640243902439, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965572033898306, "tokens_p.mean_in_band": 0.656640625, "tokens_rate.above_band": 0.979253112033195, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02074688796680498 }, { "epoch": 0.7008947592671495, "grad_norm": 668.8095306826872, "learning_rate": 3.9515377070929745e-07, "loss": 0.3655, "step": 3290, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9044265593561368, "success_rate.epoch.env.math": 0.9605831533477321, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7913077525450274, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.865131951184332, "success_rate.epoch.global": 0.8808219178082192, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997614503816794, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9974619289340102, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0025380710659898475 }, { "epoch": 0.7019599488708991, "grad_norm": 104.10640438964306, "learning_rate": 3.951374657130711e-07, "loss": 0.2848, "step": 3295, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9045226130653267, "success_rate.epoch.env.math": 0.9606681034482759, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7912431587177482, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8648635144640626, "success_rate.epoch.global": 0.8806990881458967, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9855510752688172, "tokens_p.mean_in_band": 0.74072265625, "tokens_rate.above_band": 0.840867992766727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15913200723327306 }, { "epoch": 0.7030251384746485, "grad_norm": 223.18411783631583, "learning_rate": 3.951211351089122e-07, "loss": 0.4322, "step": 3300, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, "success_rate.epoch.env.agentgym:sciworld": 0.9719626168224299, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9046184738955824, "success_rate.epoch.env.math": 0.960752688172043, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7912602419040188, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.864881471622271, "success_rate.epoch.global": 0.8807283763277693, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930167597765364, "tokens_p.mean_in_band": 0.7779947916666666, "tokens_rate.above_band": 0.93717277486911, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06282722513089005 }, { "epoch": 0.704090328078398, "grad_norm": 82.82949553281563, "learning_rate": 3.951047789087224e-07, "loss": 0.2821, "step": 3305, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.898989898989899, "success_rate.epoch.env.agentgym:sciworld": 0.9720930232558139, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9047141424272819, "success_rate.epoch.env.math": 0.9608158883521203, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7916666666666666, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8649447170684355, "success_rate.epoch.global": 0.8809090909090909, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9942781690140845, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9726027397260274, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0273972602739726 }, { "epoch": 0.7051555176821475, "grad_norm": 179.93621902742274, "learning_rate": 3.950883971244221e-07, "loss": 0.2719, "step": 3310, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9720930232558139, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9039039039039038, "success_rate.epoch.env.math": 0.960857908847185, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7916018662519441, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8649608154836234, "success_rate.epoch.global": 0.880786686838124, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975349872773537, "tokens_p.mean_in_band": 0.3880208333333333, "tokens_rate.above_band": 0.9961977186311787, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0038022813688212928 }, { "epoch": 0.7062207072858969, "grad_norm": 101.51721690606249, "learning_rate": 3.9507198976795e-07, "loss": 0.2256, "step": 3315, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.903, "success_rate.epoch.env.math": 0.9603429796355841, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7920062087698875, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8649104118756781, "success_rate.epoch.global": 0.8806646525679759, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988149578651685, "tokens_p.mean_in_band": 0.3966796875, "tokens_rate.above_band": 0.99302649930265, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00697350069735007 }, { "epoch": 0.7072858968896464, "grad_norm": 119.79466163351181, "learning_rate": 3.9505555685126384e-07, "loss": 0.2835, "step": 3320, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9021956087824351, "success_rate.epoch.env.math": 0.960427807486631, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7922480620155039, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8648712339559489, "success_rate.epoch.global": 0.8806938159879336, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0006397637795275, "tokens_p.mean_in_band": 0.5681818181818182, "tokens_rate.above_band": 0.9665144596651446, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0334855403348554 }, { "epoch": 0.7083510864933958, "grad_norm": 93.6873784895068, "learning_rate": 3.950390983863398e-07, "loss": 0.4759, "step": 3325, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9488817891373802, "success_rate.epoch.env.logic": 0.9021956087824351, "success_rate.epoch.env.math": 0.9599358974358975, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7923433874709976, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.864861712502923, "success_rate.epoch.global": 0.8805722891566266, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961811531841652, "tokens_p.mean_in_band": 0.4685202205882353, "tokens_rate.above_band": 0.9715719063545151, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028428093645484948 }, { "epoch": 0.7094162760971453, "grad_norm": 92.5826567443287, "learning_rate": 3.950226143851727e-07, "loss": 0.2608, "step": 3330, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9490445859872612, "success_rate.epoch.env.logic": 0.9024875621890547, "success_rate.epoch.env.math": 0.9594882729211087, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7924236567452648, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8649343154637704, "success_rate.epoch.global": 0.8806015037593985, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974727838258165, "tokens_p.mean_in_band": 0.6740451388888888, "tokens_rate.above_band": 0.9861963190184049, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013803680981595092 }, { "epoch": 0.7104814657008948, "grad_norm": 133.45518713465225, "learning_rate": 3.950061048597758e-07, "loss": 0.354, "step": 3335, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9490445859872612, "success_rate.epoch.env.logic": 0.9024875621890547, "success_rate.epoch.env.math": 0.9595529536987759, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7921326648669494, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8649179630870055, "success_rate.epoch.global": 0.8804804804804804, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948326771653543, "tokens_p.mean_in_band": 0.707275390625, "tokens_rate.above_band": 0.8881118881118881, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11188811188811189 }, { "epoch": 0.7115466553046442, "grad_norm": 44.22495932755032, "learning_rate": 3.9498956982218126e-07, "loss": 0.223, "step": 3340, "success_rate.epoch.env.abd": 0.9865319865319865, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9723502304147466, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9490445859872612, "success_rate.epoch.env.logic": 0.9024875621890547, "success_rate.epoch.env.math": 0.9595959595959596, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.792147806004619, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8649357427000701, "success_rate.epoch.global": 0.8805097451274363, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.3984375, "tokens_rate.above_band": 0.9562841530054644, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04371584699453552 }, { "epoch": 0.7126118449083937, "grad_norm": 494.0600063833727, "learning_rate": 3.949730092844397e-07, "loss": 0.2885, "step": 3345, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9492063492063492, "success_rate.epoch.env.logic": 0.9017857142857143, "success_rate.epoch.env.math": 0.9596388741370154, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7923076923076923, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8649207194856502, "success_rate.epoch.global": 0.8805389221556886, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993990384615384, "tokens_p.mean_in_band": 0.4153225806451613, "tokens_rate.above_band": 0.991555434486516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008444565513484064 }, { "epoch": 0.7136770345121431, "grad_norm": 53.50826643689471, "learning_rate": 3.949564232586203e-07, "loss": 0.2906, "step": 3350, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.901980198019802, "success_rate.epoch.env.math": 0.9597030752916225, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7925470610833654, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8650265234466197, "success_rate.epoch.global": 0.8807174887892377, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993742638398115, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9964788732394366, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0035211267605633804 }, { "epoch": 0.7147422241158926, "grad_norm": 99.39295881817814, "learning_rate": 3.9493981175681083e-07, "loss": 0.3975, "step": 3355, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.901980198019802, "success_rate.epoch.env.math": 0.9597883597883597, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7926408585665006, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8650428036266081, "success_rate.epoch.global": 0.8807462686567165, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9166666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924768518518519, "tokens_p.mean_in_band": 0.6631944444444444, "tokens_rate.above_band": 0.9230769230769231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07692307692307693 }, { "epoch": 0.7158074137196421, "grad_norm": 298.4261496116819, "learning_rate": 3.9492317479111767e-07, "loss": 0.2755, "step": 3360, "success_rate.epoch.env.abd": 0.9866220735785953, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9011857707509882, "success_rate.epoch.env.math": 0.9598732840549102, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7928790199081164, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8650040355093257, "success_rate.epoch.global": 0.8807749627421758, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0011354660347551, "tokens_p.mean_in_band": 0.5427631578947368, "tokens_rate.above_band": 0.9708588957055214, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029141104294478526 }, { "epoch": 0.7168726033233915, "grad_norm": 24.211421703463163, "learning_rate": 3.9490651237366565e-07, "loss": 0.3115, "step": 3365, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9003944773175543, "success_rate.epoch.env.math": 0.9599578503688093, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7931166347992352, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8649654428602035, "success_rate.epoch.global": 0.8808035714285715, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960029069767442, "tokens_p.mean_in_band": 0.477734375, "tokens_rate.above_band": 0.9678456591639871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03215434083601286 }, { "epoch": 0.717937792927141, "grad_norm": 123.13488311486617, "learning_rate": 3.948898245165982e-07, "loss": 0.4743, "step": 3370, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9003944773175543, "success_rate.epoch.env.math": 0.9595588235294118, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7928926251432938, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8645487671752087, "success_rate.epoch.global": 0.8805349182763744, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9989754098360656, "tokens_p.mean_below_band": 8.149072527885437e-09, "tokens_p.mean_in_band": 0.5138221153846154, "tokens_rate.above_band": 0.9751332149200711, "tokens_rate.below_band": 0.0017761989342806395, "tokens_rate.in_band": 0.023090586145648313 }, { "epoch": 0.7190029825308905, "grad_norm": 54.784791052820545, "learning_rate": 3.948731112320775e-07, "loss": 0.2882, "step": 3375, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9495268138801262, "success_rate.epoch.env.logic": 0.9006882989183874, "success_rate.epoch.env.math": 0.959601259181532, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7932086989698588, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8646225904799405, "success_rate.epoch.global": 0.8807121661721068, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985593971631206, "tokens_p.mean_in_band": 0.7109375, "tokens_rate.above_band": 0.986013986013986, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013986013986013986 }, { "epoch": 0.7200681721346399, "grad_norm": 369.140327671539, "learning_rate": 3.9485637253228387e-07, "loss": 0.4746, "step": 3380, "success_rate.epoch.env.abd": 0.9867549668874173, "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9495268138801262, "success_rate.epoch.env.logic": 0.900883218842002, "success_rate.epoch.env.math": 0.959643605870021, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7927619047619048, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8646115698094768, "success_rate.epoch.global": 0.8805925925925926, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942781690140845, "tokens_p.mean_in_band": 0.6019965277777778, "tokens_rate.above_band": 0.9403973509933775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059602649006622516 }, { "epoch": 0.7211333617383894, "grad_norm": 76.21711864528496, "learning_rate": 3.948396084294164e-07, "loss": 0.5591, "step": 3385, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9495268138801262, "success_rate.epoch.env.logic": 0.9000979431929481, "success_rate.epoch.env.math": 0.9596858638743455, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7931558935361217, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8642837838164819, "success_rate.epoch.global": 0.8804733727810651, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9918769113149847, "tokens_p.mean_in_band": 0.24728265942353644, "tokens_rate.above_band": 0.1634182908545727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.8365817091454273 }, { "epoch": 0.722198551342139, "grad_norm": 132.382102030933, "learning_rate": 3.9482281893569267e-07, "loss": 0.2995, "step": 3390, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9498432601880877, "success_rate.epoch.env.logic": 0.9001956947162426, "success_rate.epoch.env.math": 0.9597280334728033, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.793168880455408, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8643264523936636, "success_rate.epoch.global": 0.8805022156573117, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969758064516129, "tokens_p.mean_in_band": 0.6006944444444444, "tokens_rate.above_band": 0.9897377423033067, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010262257696693273 }, { "epoch": 0.7232637409458884, "grad_norm": 138.13340499455256, "learning_rate": 3.948060040633488e-07, "loss": 0.316, "step": 3395, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, "success_rate.epoch.env.agentgym:sciworld": 0.9724770642201835, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9498432601880877, "success_rate.epoch.env.logic": 0.9002932551319648, "success_rate.epoch.env.math": 0.9597701149425287, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7937168811506434, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8643889653555438, "success_rate.epoch.global": 0.8806784660766962, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.991304347826087, "tokens_p.mean_in_band": 0.7490234375, "tokens_rate.above_band": 0.9349593495934959, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06504065040650407 }, { "epoch": 0.7243289305496379, "grad_norm": 99.86950913131045, "learning_rate": 3.9478916382463923e-07, "loss": 0.4035, "step": 3400, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8960396039603961, "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9498432601880877, "success_rate.epoch.env.logic": 0.9002932551319648, "success_rate.epoch.env.math": 0.9598540145985401, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7933509633547412, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8643747523888123, "success_rate.epoch.global": 0.880559646539028, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953663793103448, "tokens_p.mean_in_band": 0.5267857142857143, "tokens_rate.above_band": 0.9119496855345912, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0880503144654088 }, { "epoch": 0.7253941201533873, "grad_norm": 252.53708598592215, "learning_rate": 3.947722982318371e-07, "loss": 0.5432, "step": 3405, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9002932551319648, "success_rate.epoch.env.math": 0.9599167100468506, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7936627687665032, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8645157033198124, "success_rate.epoch.global": 0.8807352941176471, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979838709677419, "tokens_p.mean_in_band": 0.54296875, "tokens_rate.above_band": 0.9914712153518124, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008528784648187633 }, { "epoch": 0.7264593097571368, "grad_norm": 75.69768401986141, "learning_rate": 3.947554072972339e-07, "loss": 0.232, "step": 3410, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8975609756097561, "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9002932551319648, "success_rate.epoch.env.math": 0.96, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7940512048192772, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8646042376912912, "success_rate.epoch.global": 0.8809104258443465, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981235565819861, "tokens_p.mean_in_band": 0.70849609375, "tokens_rate.above_band": 0.9908466819221968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009153318077803204 }, { "epoch": 0.7275244993608863, "grad_norm": 110.31940769978746, "learning_rate": 3.947384910331396e-07, "loss": 0.2308, "step": 3415, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8975609756097561, "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.900390625, "success_rate.epoch.env.math": 0.9600415153087701, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7945925647765678, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8646660781580272, "success_rate.epoch.global": 0.8810850439882698, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9904891304347826, "tokens_p.mean_in_band": 0.8078125, "tokens_rate.above_band": 0.965034965034965, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03496503496503497 }, { "epoch": 0.7285896889646357, "grad_norm": 111.18847149318067, "learning_rate": 3.947215494518827e-07, "loss": 0.3533, "step": 3420, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9004878048780488, "success_rate.epoch.env.math": 0.9601036269430052, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7945256842894638, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8647338464382531, "success_rate.epoch.global": 0.881112737920937, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982538535645472, "tokens_p.mean_in_band": 0.57421875, "tokens_rate.above_band": 0.9904580152671756, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009541984732824428 }, { "epoch": 0.7296548785683852, "grad_norm": 51.36233429912145, "learning_rate": 3.9470458256581007e-07, "loss": 0.4076, "step": 3425, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, "success_rate.epoch.env.agentgym:sciworld": 0.9727272727272728, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9005847953216374, "success_rate.epoch.env.math": 0.9601449275362319, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7941616766467066, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8647893058881299, "success_rate.epoch.global": 0.8809941520467837, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.992505081300813, "tokens_p.mean_in_band": 0.60595703125, "tokens_rate.above_band": 0.9389312977099237, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061068702290076333 }, { "epoch": 0.7307200681721346, "grad_norm": 145.25772227247307, "learning_rate": 3.94687590387287e-07, "loss": 0.3291, "step": 3430, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, "success_rate.epoch.env.agentgym:sciworld": 0.9727272727272728, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9006815968841285, "success_rate.epoch.env.math": 0.9602478058853898, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7940956651718983, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8648014575642062, "success_rate.epoch.global": 0.881021897810219, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954166666666666, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9433962264150944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05660377358490566 }, { "epoch": 0.7317852577758841, "grad_norm": 232.9425916066108, "learning_rate": 3.946705729286974e-07, "loss": 0.2694, "step": 3435, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, "success_rate.epoch.env.agentgym:sciworld": 0.9727272727272728, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9008746355685131, "success_rate.epoch.env.math": 0.9603297269448737, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7940298507462686, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8648204707749553, "success_rate.epoch.global": 0.8810495626822158, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9935897435897436, "tokens_p.mean_in_band": 0.6338975694444444, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 0.7328504473796336, "grad_norm": 141.46887205025453, "learning_rate": 3.9465353020244336e-07, "loss": 0.4536, "step": 3440, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8980582524271845, "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9008746355685131, "success_rate.epoch.env.math": 0.9603705609881626, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7937453462397617, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8648144723464557, "success_rate.epoch.global": 0.8809315866084425, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997093023255814, "tokens_p.mean_in_band": 0.4880642361111111, "tokens_rate.above_band": 0.9828571428571429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017142857142857144 }, { "epoch": 0.733915636983383, "grad_norm": 246.5183921455988, "learning_rate": 3.946364622209456e-07, "loss": 0.6791, "step": 3445, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8995215311004785, "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9000969932104753, "success_rate.epoch.env.math": 0.9604113110539846, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.7932316846411306, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8648338109630417, "success_rate.epoch.global": 0.8806686046511628, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7083333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.999771897810219, "tokens_p.mean_in_band": 0.4527622767857143, "tokens_rate.above_band": 0.9750889679715302, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02491103202846975 }, { "epoch": 0.7349808265871325, "grad_norm": 178.60062676322593, "learning_rate": 3.94619368996643e-07, "loss": 0.3199, "step": 3450, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8995215311004785, "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9000969932104753, "success_rate.epoch.env.math": 0.9604925602873269, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7934621099554234, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8646287594990675, "success_rate.epoch.global": 0.8806966618287373, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987555309734514, "tokens_p.mean_in_band": 0.7517361111111112, "tokens_rate.above_band": 0.9617021276595744, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03829787234042553 }, { "epoch": 0.736046016190882, "grad_norm": 312.11935835420775, "learning_rate": 3.946022505419931e-07, "loss": 0.3558, "step": 3455, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8995215311004785, "success_rate.epoch.env.agentgym:sciworld": 0.9728506787330317, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9001937984496124, "success_rate.epoch.env.math": 0.9605734767025089, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7934742306266221, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8646460178922964, "success_rate.epoch.global": 0.8807246376811594, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9903846153846154, "tokens_p.mean_in_band": 0.680084228515625, "tokens_rate.above_band": 0.8666666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13333333333333333 }, { "epoch": 0.7371112057946314, "grad_norm": 283.3552891098637, "learning_rate": 3.945851068694716e-07, "loss": 0.3878, "step": 3460, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9001937984496124, "success_rate.epoch.env.math": 0.960613810741688, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7934863064396743, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8647667394217831, "success_rate.epoch.global": 0.8807525325615051, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966044142614601, "tokens_p.mean_in_band": 0.6845703125, "tokens_rate.above_band": 0.9865996649916248, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01340033500837521 }, { "epoch": 0.7381763953983809, "grad_norm": 57.614269926790676, "learning_rate": 3.945679379915728e-07, "loss": 0.3129, "step": 3465, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9730941704035875, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9005791505791506, "success_rate.epoch.env.math": 0.9606741573033708, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7936390532544378, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8648321615069284, "success_rate.epoch.global": 0.8809248554913295, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967672413793104, "tokens_p.mean_in_band": 0.818359375, "tokens_rate.above_band": 0.9775280898876404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02247191011235955 }, { "epoch": 0.7392415850021303, "grad_norm": 208.87748558830458, "learning_rate": 3.9455074392080924e-07, "loss": 0.4107, "step": 3470, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9730941704035875, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9005791505791506, "success_rate.epoch.env.math": 0.9602649006622517, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7935745937961596, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8648030816141303, "success_rate.epoch.global": 0.8808080808080808, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983850129198967, "tokens_p.mean_in_band": 0.353515625, "tokens_rate.above_band": 0.9699248120300752, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03007518796992481 }, { "epoch": 0.7403067746058798, "grad_norm": 97.87773724970161, "learning_rate": 3.945335246697118e-07, "loss": 0.4721, "step": 3475, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9732142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9005791505791506, "success_rate.epoch.env.math": 0.960285132382892, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7930780559646539, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.864770700632297, "success_rate.epoch.global": 0.8805475504322766, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9979016786570744, "tokens_p.mean_in_band": 0.5667067307692307, "tokens_rate.above_band": 0.9697674418604652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030232558139534883 }, { "epoch": 0.7413719642096294, "grad_norm": 116.52193815172264, "learning_rate": 3.9451628025082966e-07, "loss": 0.2187, "step": 3480, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9004739336492891, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9006750241080038, "success_rate.epoch.env.math": 0.9603457041179461, "success_rate.epoch.env.sat": 0.12195121951219512, "success_rate.epoch.env.science": 0.7929385803604266, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8645489888541344, "success_rate.epoch.global": 0.880431654676259, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980377906976744, "tokens_p.mean_in_band": 0.6788194444444444, "tokens_rate.above_band": 0.9598214285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04017857142857143 }, { "epoch": 0.7424371538133788, "grad_norm": 146.8391565509642, "learning_rate": 3.9449901067673057e-07, "loss": 0.2319, "step": 3485, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9004739336492891, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9007707129094412, "success_rate.epoch.env.math": 0.9603859827323514, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7930275229357798, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8664699737028747, "success_rate.epoch.global": 0.8804597701149425, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979807692307693, "tokens_p.mean_in_band": 0.7337239583333334, "tokens_rate.above_band": 0.9908536585365854, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009146341463414634 }, { "epoch": 0.7435023434171283, "grad_norm": 87.83358489027319, "learning_rate": 3.9448171596000035e-07, "loss": 0.5532, "step": 3490, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9004739336492891, "success_rate.epoch.env.agentgym:sciworld": 0.973568281938326, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9008662175168431, "success_rate.epoch.env.math": 0.9604662949822605, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7932551319648093, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8665280077475415, "success_rate.epoch.global": 0.8806312769010043, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998126102292769, "tokens_p.mean_in_band": 0.8020833333333334, "tokens_rate.above_band": 0.984375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015625 }, { "epoch": 0.7445675330208777, "grad_norm": 140.63870216443541, "learning_rate": 3.9446439611324345e-07, "loss": 0.3322, "step": 3495, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, "success_rate.epoch.env.agentgym:sciworld": 0.973568281938326, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9009615384615385, "success_rate.epoch.env.math": 0.960546282245827, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7931918008784773, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8665808659020009, "success_rate.epoch.global": 0.8806590257879656, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952431289640592, "tokens_p.mean_in_band": 0.6541466346153846, "tokens_rate.above_band": 0.9732510288065843, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026748971193415638 }, { "epoch": 0.7456327226246272, "grad_norm": 41.51084693612499, "learning_rate": 3.9444705114908223e-07, "loss": 0.2664, "step": 3500, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, "success_rate.epoch.env.agentgym:sciworld": 0.973568281938326, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9506172839506173, "success_rate.epoch.env.logic": 0.9010566762728146, "success_rate.epoch.env.math": 0.9605662285136501, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7933552391383717, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8666200849933446, "success_rate.epoch.global": 0.8806866952789699, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958425720620843, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.986870897155361, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01312910284463895 }, { "epoch": 0.7466979122283767, "grad_norm": 76.42118417073381, "learning_rate": 3.944296810801577e-07, "loss": 0.2915, "step": 3505, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9011516314779271, "success_rate.epoch.env.math": 0.9606259464916709, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7930656934306569, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8684716907498451, "success_rate.epoch.global": 0.8807142857142857, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0001061120543293, "tokens_p.mean_in_band": 0.66845703125, "tokens_rate.above_band": 0.9932546374367622, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006745362563237774 }, { "epoch": 0.7477631018321261, "grad_norm": 209.20302871249308, "learning_rate": 3.9441228591912903e-07, "loss": 0.3736, "step": 3510, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9003831417624522, "success_rate.epoch.env.math": 0.9606854838709677, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7929274516952242, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8684416240604446, "success_rate.epoch.global": 0.8805991440798859, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0006471893491125, "tokens_p.mean_in_band": 0.6270559210526315, "tokens_rate.above_band": 0.9726618705035971, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027338129496402876 }, { "epoch": 0.7488282914358756, "grad_norm": 72.92206030945077, "learning_rate": 3.943948656786737e-07, "loss": 0.3632, "step": 3515, "success_rate.epoch.env.abd": 0.9837133550488599, "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9003831417624522, "success_rate.epoch.env.math": 0.9607448414695521, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7929403202328966, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8684578989520869, "success_rate.epoch.global": 0.8806267806267807, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9939983443708609, "tokens_p.mean_in_band": 0.29296875, "tokens_rate.above_band": 0.993421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006578947368421052 }, { "epoch": 0.749893481039625, "grad_norm": 43.01160491240064, "learning_rate": 3.943774203714874e-07, "loss": 0.4027, "step": 3520, "success_rate.epoch.env.abd": 0.9837662337662337, "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9005736137667304, "success_rate.epoch.env.math": 0.9607843137254902, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7925899019251725, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8684517539220749, "success_rate.epoch.global": 0.8805120910384068, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.990728021978022, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.883495145631068, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11650485436893204 }, { "epoch": 0.7509586706433745, "grad_norm": 52.05738905273373, "learning_rate": 3.9435995001028417e-07, "loss": 0.2509, "step": 3525, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.9009433962264151, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9008579599618685, "success_rate.epoch.env.math": 0.9608433734939759, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7928156748911466, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8685082735029749, "success_rate.epoch.global": 0.8806818181818182, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951388888888889, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9440559440559441, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055944055944055944 }, { "epoch": 0.752023860247124, "grad_norm": 53.87590904162749, "learning_rate": 3.9434245460779636e-07, "loss": 0.1966, "step": 3530, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.8967136150234741, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.90104662226451, "success_rate.epoch.env.math": 0.9609022556390977, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7927536231884058, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8681406109158915, "success_rate.epoch.global": 0.8805673758865248, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997877358490566, "tokens_p.mean_in_band": 0.641015625, "tokens_rate.above_band": 0.9464285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05357142857142857 }, { "epoch": 0.7530890498508734, "grad_norm": 77.96960488403518, "learning_rate": 3.9432493417677435e-07, "loss": 0.2345, "step": 3535, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.8967136150234741, "success_rate.epoch.env.agentgym:sciworld": 0.9737991266375546, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9001901140684411, "success_rate.epoch.env.math": 0.9609414121181773, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7926167209554832, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8681795076621658, "success_rate.epoch.global": 0.8804532577903683, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8095238095238094, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985699549887472, "tokens_p.mean_in_band": 0.6447916666666667, "tokens_rate.above_band": 0.9888724035608308, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01112759643916914 }, { "epoch": 0.7541542394546229, "grad_norm": 93.0625763483448, "learning_rate": 3.94307388729987e-07, "loss": 0.2656, "step": 3540, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.897196261682243, "success_rate.epoch.env.agentgym:sciworld": 0.9737991266375546, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8995260663507109, "success_rate.epoch.env.math": 0.9609804902451226, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7924801156905278, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8681588957820228, "success_rate.epoch.global": 0.8803394625176804, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982142857142857, "tokens_p.mean_in_band": 0.6884428879310345, "tokens_rate.above_band": 0.9476534296028881, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052346570397111915 }, { "epoch": 0.7552194290583724, "grad_norm": 207.53699727916376, "learning_rate": 3.9428981828022126e-07, "loss": 0.3141, "step": 3545, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8976744186046511, "success_rate.epoch.env.agentgym:sciworld": 0.9739130434782609, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.8996212121212122, "success_rate.epoch.env.math": 0.961, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.792854565138939, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8682706637544588, "success_rate.epoch.global": 0.8805084745762712, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965902278177458, "tokens_p.mean_in_band": 0.765625, "tokens_rate.above_band": 0.9904988123515439, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009501187648456057 }, { "epoch": 0.7562846186621218, "grad_norm": 29.11987209177875, "learning_rate": 3.9427222284028237e-07, "loss": 0.2818, "step": 3550, "success_rate.epoch.env.abd": 0.9839228295819936, "success_rate.epoch.env.agentgym:alfworld": 0.8976744186046511, "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.8999055712936733, "success_rate.epoch.env.math": 0.961038961038961, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7923576063446287, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.868269859554903, "success_rate.epoch.global": 0.8803949224259521, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976158038147139, "tokens_p.mean_in_band": 0.5552455357142857, "tokens_rate.above_band": 0.963254593175853, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03674540682414698 }, { "epoch": 0.7573498082658713, "grad_norm": 94.83104027104345, "learning_rate": 3.942546024229938e-07, "loss": 0.4543, "step": 3555, "success_rate.epoch.env.abd": 0.9839228295819936, "success_rate.epoch.env.agentgym:alfworld": 0.8976744186046511, "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9000942507068803, "success_rate.epoch.env.math": 0.9605788423153693, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7923713566030947, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8683047083357308, "success_rate.epoch.global": 0.8802816901408451, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997504752851711, "tokens_p.mean_in_band": 0.30282738095238093, "tokens_rate.above_band": 0.926056338028169, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07394366197183098 }, { "epoch": 0.7584149978696207, "grad_norm": 42.85649698328146, "learning_rate": 3.9423695704119713e-07, "loss": 0.6038, "step": 3560, "success_rate.epoch.env.abd": 0.9839743589743589, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.89924670433145, "success_rate.epoch.env.math": 0.96061814556331, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7923850574712644, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8682802280335982, "success_rate.epoch.global": 0.880168776371308, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0011784785435631, "tokens_p.mean_in_band": 0.60166015625, "tokens_rate.above_band": 0.9505562422744128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049443757725587144 }, { "epoch": 0.7594801874733702, "grad_norm": 95.62170587598638, "learning_rate": 3.942192867077522e-07, "loss": 0.3187, "step": 3565, "success_rate.epoch.env.abd": 0.9840255591054313, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.8993414863593603, "success_rate.epoch.env.math": 0.960179193628671, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7927572606669057, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8682874310717786, "success_rate.epoch.global": 0.8801966292134832, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992655529953917, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.9730941704035875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026905829596412557 }, { "epoch": 0.7605453770771198, "grad_norm": 82.75140417138097, "learning_rate": 3.94201591435537e-07, "loss": 0.2974, "step": 3570, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.974025974025974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.8994360902255639, "success_rate.epoch.env.math": 0.9602977667493796, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7929057685417413, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8683249364242638, "success_rate.epoch.global": 0.8803646563814866, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958389945652174, "tokens_p.mean_in_band": 0.77109375, "tokens_rate.above_band": 0.9865951742627346, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013404825737265416 }, { "epoch": 0.7616105666808692, "grad_norm": 89.97007715645253, "learning_rate": 3.9418387123744775e-07, "loss": 0.3633, "step": 3575, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8981481481481481, "success_rate.epoch.env.agentgym:sciworld": 0.9741379310344828, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.899624765478424, "success_rate.epoch.env.math": 0.9603174603174603, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7929924919556668, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8683619409009342, "success_rate.epoch.global": 0.8803921568627451, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961168639053254, "tokens_p.mean_in_band": 0.6439732142857143, "tokens_rate.above_band": 0.9602272727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03977272727272727 }, { "epoch": 0.7626757562846187, "grad_norm": 158.98763832371196, "learning_rate": 3.941661261263988e-07, "loss": 0.3576, "step": 3580, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8986175115207373, "success_rate.epoch.env.agentgym:sciworld": 0.9742489270386266, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.8987816307403936, "success_rate.epoch.env.math": 0.9603764239722635, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7929310960371296, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8683378311168405, "success_rate.epoch.global": 0.8802797202797202, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984730113636363, "tokens_p.mean_in_band": 0.71240234375, "tokens_rate.above_band": 0.9821428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017857142857142856 }, { "epoch": 0.7637409458883682, "grad_norm": 39.27060480238137, "learning_rate": 3.9414835611532267e-07, "loss": 0.3172, "step": 3585, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8986175115207373, "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.898876404494382, "success_rate.epoch.env.math": 0.9604547701433515, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.7931526390870185, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8683936330298152, "success_rate.epoch.global": 0.8804469273743016, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985702614379085, "tokens_p.mean_in_band": 0.876953125, "tokens_rate.above_band": 0.9956616052060737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004338394793926247 }, { "epoch": 0.7648061354921176, "grad_norm": 57.03669704244732, "learning_rate": 3.9413056121716995e-07, "loss": 0.2352, "step": 3590, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9516616314199395, "success_rate.epoch.env.logic": 0.898876404494382, "success_rate.epoch.env.math": 0.9600394671928959, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7933000712758375, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8681019283213043, "success_rate.epoch.global": 0.8803347280334728, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980332167832168, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.984267453294002, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015732546705998034 }, { "epoch": 0.7658713250958671, "grad_norm": 124.93272759491029, "learning_rate": 3.941127414449096e-07, "loss": 0.7262, "step": 3595, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8990825688073395, "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9519519519519519, "success_rate.epoch.env.logic": 0.8989710009354537, "success_rate.epoch.env.math": 0.9601377952755905, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7930911680911681, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8681268684914051, "success_rate.epoch.global": 0.8803621169916435, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981770833333333, "tokens_p.mean_in_band": 0.146484375, "tokens_rate.above_band": 0.995850622406639, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004149377593360996 }, { "epoch": 0.7669365146996165, "grad_norm": 149.49156079177754, "learning_rate": 3.940948968115283e-07, "loss": 0.4148, "step": 3600, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8949771689497716, "success_rate.epoch.env.agentgym:sciworld": 0.9744680851063829, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9519519519519519, "success_rate.epoch.env.logic": 0.8993476234855545, "success_rate.epoch.env.math": 0.9601769911504425, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7929562433297759, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8677791861101316, "success_rate.epoch.global": 0.880250347705146, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999371408045977, "tokens_p.mean_in_band": 0.36607142857142855, "tokens_rate.above_band": 0.9802816901408451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01971830985915493 }, { "epoch": 0.768001704303366, "grad_norm": 255.56765457368186, "learning_rate": 3.9407702733003125e-07, "loss": 0.3189, "step": 3605, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8954545454545455, "success_rate.epoch.env.agentgym:sciworld": 0.9745762711864406, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9519519519519519, "success_rate.epoch.env.logic": 0.8985102420856611, "success_rate.epoch.env.math": 0.9602746444335458, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7931034482758621, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8677785533295979, "success_rate.epoch.global": 0.8802777777777778, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985227272727273, "tokens_p.mean_in_band": 0.6287202380952381, "tokens_rate.above_band": 0.975177304964539, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024822695035460994 }, { "epoch": 0.7690668939071155, "grad_norm": 41.88641026449317, "learning_rate": 3.940591330134416e-07, "loss": 0.2524, "step": 3610, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9745762711864406, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9522388059701492, "success_rate.epoch.env.logic": 0.8986046511627906, "success_rate.epoch.env.math": 0.9602941176470589, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7933972310969116, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8675178138357101, "success_rate.epoch.global": 0.8803051317614424, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996012180974478, "tokens_p.mean_in_band": 0.691796875, "tokens_rate.above_band": 0.994232987312572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0057670126874279125 }, { "epoch": 0.7701320835108649, "grad_norm": 106.82604898741157, "learning_rate": 3.940412138748005e-07, "loss": 0.5378, "step": 3615, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8979591836734694, "success_rate.epoch.env.math": 0.9602941176470589, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.793690180786955, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8675130368116054, "success_rate.epoch.global": 0.8803324099722992, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999093326885881, "tokens_p.mean_in_band": 0.6770833333333334, "tokens_rate.above_band": 0.9913710450623202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00862895493767977 }, { "epoch": 0.7711972731146144, "grad_norm": 90.23979785986623, "learning_rate": 3.9402326992716743e-07, "loss": 0.3911, "step": 3620, "success_rate.epoch.env.abd": 0.9841772151898734, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8980537534754403, "success_rate.epoch.env.math": 0.960352422907489, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7933474876150035, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8675003470800907, "success_rate.epoch.global": 0.8802213001383126, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9944526627218935, "tokens_p.mean_in_band": 0.6388888888888888, "tokens_rate.above_band": 0.949438202247191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05056179775280899 }, { "epoch": 0.7722624627183639, "grad_norm": 463.65678170863237, "learning_rate": 3.940053011836197e-07, "loss": 0.3113, "step": 3625, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9525222551928784, "success_rate.epoch.env.logic": 0.8982423681776133, "success_rate.epoch.env.math": 0.9604105571847508, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7934936350777935, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8675579574095476, "success_rate.epoch.global": 0.8803867403314917, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962962962962963, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.7733276523221133, "grad_norm": 1015.7081195335469, "learning_rate": 3.9398730765725285e-07, "loss": 0.4532, "step": 3630, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9525222551928784, "success_rate.epoch.env.logic": 0.8984302862419206, "success_rate.epoch.env.math": 0.9605070697220868, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7936395759717314, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8676357339153302, "success_rate.epoch.global": 0.880551724137931, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971590909090909, "tokens_p.mean_in_band": 0.625, "tokens_rate.above_band": 0.993660855784469, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006339144215530904 }, { "epoch": 0.7743928419258628, "grad_norm": 254.45479816578788, "learning_rate": 3.939692893611804e-07, "loss": 0.4188, "step": 3635, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.8984302862419206, "success_rate.epoch.env.math": 0.9605839416058394, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7936507936507936, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8676565117574598, "success_rate.epoch.global": 0.8805785123966943, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977532679738562, "tokens_p.mean_in_band": 0.66650390625, "tokens_rate.above_band": 0.9828693790149893, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017130620985010708 }, { "epoch": 0.7754580315296122, "grad_norm": 129.43309277805292, "learning_rate": 3.93951246308534e-07, "loss": 0.2792, "step": 3640, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.8985239852398524, "success_rate.epoch.env.math": 0.9606796116504854, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7939415287072913, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8677001575846486, "success_rate.epoch.global": 0.8807427785419533, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9939365671641791, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9710144927536232, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028985507246376812 }, { "epoch": 0.7765232211333617, "grad_norm": 76.38240990925108, "learning_rate": 3.939331785124632e-07, "loss": 0.3218, "step": 3645, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.8986175115207373, "success_rate.epoch.env.math": 0.9607938044530494, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7941590429275158, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8677388151577098, "success_rate.epoch.global": 0.8809065934065934, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9912790697674418, "tokens_p.mean_in_band": 0.8291015625, "tokens_rate.above_band": 0.9148936170212766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0851063829787234 }, { "epoch": 0.7775884107371112, "grad_norm": 0.0, "learning_rate": 3.9391508598613586e-07, "loss": 0.1496, "step": 3650, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8878923766816144, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.8987108655616943, "success_rate.epoch.env.math": 0.9608506524891252, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7944483485593816, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8674706104846355, "success_rate.epoch.global": 0.8809327846364884, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986270920502092, "tokens_p.mean_in_band": 0.5, "tokens_rate.above_band": 0.9958333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004166666666666667 }, { "epoch": 0.7786536003408606, "grad_norm": 138.6709908629782, "learning_rate": 3.938969687427375e-07, "loss": 0.321, "step": 3655, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8883928571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.898989898989899, "success_rate.epoch.env.math": 0.9609261939218524, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7942415730337079, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8675295450115901, "success_rate.epoch.global": 0.880958904109589, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968184389140271, "tokens_p.mean_in_band": 0.61474609375, "tokens_rate.above_band": 0.9910313901345291, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008968609865470852 }, { "epoch": 0.7797187899446102, "grad_norm": 105.79139106977253, "learning_rate": 3.9387882679547194e-07, "loss": 0.3139, "step": 3660, "success_rate.epoch.env.abd": 0.9843260188087775, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.8990825688073395, "success_rate.epoch.env.math": 0.9610014443909485, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7940350877192982, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8676338887941548, "success_rate.epoch.global": 0.8809849521203831, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988567073170732, "tokens_p.mean_in_band": 0.578125, "tokens_rate.above_band": 0.9899396378269618, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01006036217303823 }, { "epoch": 0.7807839795483597, "grad_norm": 75.0752179318001, "learning_rate": 3.9386066015756085e-07, "loss": 0.2647, "step": 3665, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.8992673992673993, "success_rate.epoch.env.math": 0.9610576923076923, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7943237561317449, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8676865004287435, "success_rate.epoch.global": 0.8811475409836066, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959558823529412, "tokens_p.mean_in_band": 0.8546875, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 0.7818491691521091, "grad_norm": 180.31670923683598, "learning_rate": 3.9384246884224397e-07, "loss": 0.383, "step": 3670, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9746835443037974, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.8992673992673993, "success_rate.epoch.env.math": 0.9610951008645533, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7943336831059811, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8677205428033484, "success_rate.epoch.global": 0.8811732605729877, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973507785467128, "tokens_p.mean_in_band": 0.5111177884615384, "tokens_rate.above_band": 0.9780033840947546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021996615905245348 }, { "epoch": 0.7829143587558586, "grad_norm": 61.70226454700537, "learning_rate": 3.93824252862779e-07, "loss": 0.1727, "step": 3675, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8893805309734514, "success_rate.epoch.env.agentgym:sciworld": 0.9747899159663865, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.8993595608417201, "success_rate.epoch.env.math": 0.961169702780441, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.7945492662473794, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8678096664741446, "success_rate.epoch.global": 0.8813351498637603, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967261904761905, "tokens_p.mean_in_band": 0.498046875, "tokens_rate.above_band": 0.997624703087886, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0023752969121140144 }, { "epoch": 0.783979548359608, "grad_norm": 78.05984307283907, "learning_rate": 3.938060122324416e-07, "loss": 0.3464, "step": 3680, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8898678414096917, "success_rate.epoch.env.agentgym:sciworld": 0.9747899159663865, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.8995433789954338, "success_rate.epoch.env.math": 0.9611883085769046, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7949075688873387, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8675835467915104, "success_rate.epoch.global": 0.8813605442176871, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995492053789731, "tokens_p.mean_in_band": 0.44375, "tokens_rate.above_band": 0.964622641509434, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03537735849056604 }, { "epoch": 0.7850447379633575, "grad_norm": 169.5093349589927, "learning_rate": 3.9378774696452543e-07, "loss": 0.4289, "step": 3685, "success_rate.epoch.env.abd": 0.984472049689441, "success_rate.epoch.env.agentgym:alfworld": 0.8898678414096917, "success_rate.epoch.env.agentgym:sciworld": 0.9748953974895398, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.8998178506375227, "success_rate.epoch.env.math": 0.9612625538020086, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7949790794979079, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8676357361258645, "success_rate.epoch.global": 0.8815217391304347, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985039893617021, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9947089947089947, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005291005291005291 }, { "epoch": 0.786109927567107, "grad_norm": 524.3044770124952, "learning_rate": 3.9376945707234207e-07, "loss": 0.354, "step": 3690, "success_rate.epoch.env.abd": 0.984472049689441, "success_rate.epoch.env.agentgym:alfworld": 0.8908296943231441, "success_rate.epoch.env.agentgym:sciworld": 0.9748953974895398, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.899909008189263, "success_rate.epoch.env.math": 0.9612995699952221, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7948450017415535, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.867795396946783, "success_rate.epoch.global": 0.8815468113975576, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996159754224271, "tokens_p.mean_in_band": 0.333984375, "tokens_rate.above_band": 0.9938931297709923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0061068702290076335 }, { "epoch": 0.7871751171708564, "grad_norm": 98.16120192533465, "learning_rate": 3.937511425692211e-07, "loss": 0.2423, "step": 3695, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8908296943231441, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9001814882032668, "success_rate.epoch.env.math": 0.961354961832061, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.794987817612252, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8678520664360164, "success_rate.epoch.global": 0.8817073170731707, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957959641255605, "tokens_p.mean_in_band": 0.796875, "tokens_rate.above_band": 0.9955357142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004464285714285714 }, { "epoch": 0.7882403067746059, "grad_norm": 69.69042512171194, "learning_rate": 3.9373280346851e-07, "loss": 0.3902, "step": 3700, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8908296943231441, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9530791788856305, "success_rate.epoch.env.logic": 0.8994565217391305, "success_rate.epoch.env.math": 0.96141019533111, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.794577685088634, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8677664425202288, "success_rate.epoch.global": 0.8814614343707713, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9988610478359908, "tokens_p.mean_in_band": 0.5053125, "tokens_rate.above_band": 0.981371087928465, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018628912071535022 }, { "epoch": 0.7893054963783553, "grad_norm": 68.02646839305561, "learning_rate": 3.9371443978357404e-07, "loss": 0.3503, "step": 3705, "success_rate.epoch.env.abd": 0.9845679012345679, "success_rate.epoch.env.agentgym:alfworld": 0.8917748917748918, "success_rate.epoch.env.agentgym:sciworld": 0.9752066115702479, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9530791788856305, "success_rate.epoch.env.logic": 0.8986425339366516, "success_rate.epoch.env.math": 0.9614469300333175, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7947203890239667, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.867817809543194, "success_rate.epoch.global": 0.8814864864864865, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978649068322981, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.9889434889434889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011056511056511056 }, { "epoch": 0.7903706859821048, "grad_norm": 24.68769935889459, "learning_rate": 3.936960515277967e-07, "loss": 0.3215, "step": 3710, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9752066115702479, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.8986425339366516, "success_rate.epoch.env.math": 0.961465271170314, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7944540727902947, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8678544632889186, "success_rate.epoch.global": 0.8813765182186235, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968619246861925, "tokens_p.mean_in_band": 0.54921875, "tokens_rate.above_band": 0.9795081967213115, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020491803278688523 }, { "epoch": 0.7914358755858543, "grad_norm": 212.9112060964346, "learning_rate": 3.936776387145792e-07, "loss": 0.2509, "step": 3715, "success_rate.epoch.env.abd": 0.9846625766871165, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.8987341772151899, "success_rate.epoch.env.math": 0.961520190023753, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7947386638975424, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8679072247176475, "success_rate.epoch.global": 0.8815363881401618, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985632183908046, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.7925010651896037, "grad_norm": 78.58180947171188, "learning_rate": 3.9365920135734055e-07, "loss": 0.5729, "step": 3720, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.8988256549232159, "success_rate.epoch.env.math": 0.9615931721194879, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7946058091286307, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8679185998002573, "success_rate.epoch.global": 0.8815612382234186, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9899088541666666, "tokens_p.mean_in_band": 0.609765625, "tokens_rate.above_band": 0.9504950495049505, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04950495049504951 }, { "epoch": 0.7935662547933532, "grad_norm": 99.52998609777296, "learning_rate": 3.936407394695179e-07, "loss": 0.4686, "step": 3725, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.898014440433213, "success_rate.epoch.env.math": 0.9616658778987222, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.794818652849741, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8678955391488354, "success_rate.epoch.global": 0.8815860215053763, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979134689178818, "tokens_p.mean_in_band": 0.566015625, "tokens_rate.above_band": 0.9848828420256992, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015117157974300832 }, { "epoch": 0.7946314443971026, "grad_norm": 119.00773937110698, "learning_rate": 3.9362225306456595e-07, "loss": 0.3206, "step": 3730, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.8981981981981982, "success_rate.epoch.env.math": 0.9617383089277279, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.7947568126940324, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.867913207206861, "success_rate.epoch.global": 0.8816107382550336, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925, "tokens_p.mean_in_band": 0.6253551136363636, "tokens_rate.above_band": 0.872093023255814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12790697674418605 }, { "epoch": 0.7956966340008521, "grad_norm": 129.37629931877842, "learning_rate": 3.9360374215595766e-07, "loss": 0.5105, "step": 3735, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.8974820143884892, "success_rate.epoch.env.math": 0.9613207547169811, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7946243969676086, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8675029363026625, "success_rate.epoch.global": 0.8812332439678284, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9972584575688074, "tokens_p.mean_below_band": 5.699694156646729e-07, "tokens_p.mean_in_band": 0.504856418918919, "tokens_rate.above_band": 0.9582417582417583, "tokens_rate.below_band": 0.001098901098901099, "tokens_rate.in_band": 0.04065934065934066 }, { "epoch": 0.7967618236046016, "grad_norm": 25.051683918926575, "learning_rate": 3.9358520675718355e-07, "loss": 0.3241, "step": 3740, "success_rate.epoch.env.abd": 0.9848024316109423, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.8974820143884892, "success_rate.epoch.env.math": 0.9609411764705882, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7945629731589814, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8674670573931125, "success_rate.epoch.global": 0.8811244979919679, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9916424418604651, "tokens_p.mean_in_band": 0.7161458333333334, "tokens_rate.above_band": 0.8514851485148515, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1485148514851485 }, { "epoch": 0.797827013208351, "grad_norm": 215.1764072248656, "learning_rate": 3.9356664688175215e-07, "loss": 0.4316, "step": 3745, "success_rate.epoch.env.abd": 0.9848024316109423, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9753086419753086, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.8960573476702509, "success_rate.epoch.env.math": 0.9609779031499764, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7945017182130584, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8673353123944966, "success_rate.epoch.global": 0.8808823529411764, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.99902496099844, "tokens_p.mean_in_band": 0.57421875, "tokens_rate.above_band": 0.9567164179104478, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04328358208955224 }, { "epoch": 0.7988922028121006, "grad_norm": 59.39626896344402, "learning_rate": 3.9354806254318967e-07, "loss": 0.3688, "step": 3750, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9754098360655737, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.8960573476702509, "success_rate.epoch.env.math": 0.9610328638497653, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7948542024013722, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8673857389595793, "success_rate.epoch.global": 0.881041388518024, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954578488372093, "tokens_p.mean_in_band": 0.66640625, "tokens_rate.above_band": 0.9717514124293786, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02824858757062147 }, { "epoch": 0.7999573924158501, "grad_norm": 228.32473794319367, "learning_rate": 3.935294537550403e-07, "loss": 0.4245, "step": 3755, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.8961504028648165, "success_rate.epoch.env.math": 0.9611241217798595, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7947224126113777, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8673996381732817, "success_rate.epoch.global": 0.8810666666666667, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99775, "tokens_p.mean_in_band": 0.7295619419642857, "tokens_rate.above_band": 0.946969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05303030303030303 }, { "epoch": 0.8010225820195995, "grad_norm": 150.2309611810832, "learning_rate": 3.9351082053086603e-07, "loss": 0.3381, "step": 3760, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8922413793103449, "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.8962432915921288, "success_rate.epoch.env.math": 0.961178671655753, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7947314403010606, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8674481418277703, "success_rate.epoch.global": 0.8810918774966711, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963235294117647, "tokens_p.mean_in_band": 0.6863839285714286, "tokens_rate.above_band": 0.974910394265233, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025089605734767026 }, { "epoch": 0.802087771623349, "grad_norm": 71.81975056934229, "learning_rate": 3.934921628842465e-07, "loss": 0.5153, "step": 3765, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.8963360142984808, "success_rate.epoch.env.math": 0.9611968209443665, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7943989071038251, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8667469037487247, "success_rate.epoch.global": 0.8807180851063829, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9991147308781869, "tokens_p.mean_in_band": 0.4549696180555556, "tokens_rate.above_band": 0.9514824797843666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04851752021563342 }, { "epoch": 0.8031529612270984, "grad_norm": 131.7346705630221, "learning_rate": 3.934734808287794e-07, "loss": 0.2724, "step": 3770, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8851063829787233, "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.8956289027653881, "success_rate.epoch.env.math": 0.9612330686595049, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7944084555063076, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8667314203803488, "success_rate.epoch.global": 0.8806108897742364, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004664179104477, "tokens_p.mean_in_band": 0.5202907986111112, "tokens_rate.above_band": 0.9811715481171548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01882845188284519 }, { "epoch": 0.8042181508308479, "grad_norm": 58.02425940249168, "learning_rate": 3.9345477437808e-07, "loss": 0.3586, "step": 3775, "success_rate.epoch.env.abd": 0.9849397590361446, "success_rate.epoch.env.agentgym:alfworld": 0.885593220338983, "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.8957219251336899, "success_rate.epoch.env.math": 0.9612511671335201, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7947583390061266, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.866825885461263, "success_rate.epoch.global": 0.8807692307692307, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985829355608592, "tokens_p.mean_in_band": 0.7946428571428571, "tokens_rate.above_band": 0.9835680751173709, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01643192488262911 }, { "epoch": 0.8052833404345974, "grad_norm": 147.11832637493086, "learning_rate": 3.9343604354578144e-07, "loss": 0.2905, "step": 3780, "success_rate.epoch.env.abd": 0.9850299401197605, "success_rate.epoch.env.agentgym:alfworld": 0.885593220338983, "success_rate.epoch.env.agentgym:sciworld": 0.9755102040816327, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9538904899135446, "success_rate.epoch.env.logic": 0.8958147818343722, "success_rate.epoch.env.math": 0.9608391608391609, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7946276776606597, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8668053067907622, "success_rate.epoch.global": 0.8806622516556292, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972722960151803, "tokens_p.mean_in_band": 0.3949652777777778, "tokens_rate.above_band": 0.9669724770642202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03302752293577982 }, { "epoch": 0.8063485300383468, "grad_norm": 66.76721594519827, "learning_rate": 3.9341728834553464e-07, "loss": 0.3325, "step": 3785, "success_rate.epoch.env.abd": 0.9850299401197605, "success_rate.epoch.env.agentgym:alfworld": 0.885593220338983, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9511494252873564, "success_rate.epoch.env.logic": 0.8959074733096085, "success_rate.epoch.env.math": 0.9609120521172638, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7944972826086957, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8665683681626817, "success_rate.epoch.global": 0.8805555555555555, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995216670997921, "tokens_p.mean_in_band": 0.5562855113636364, "tokens_rate.above_band": 0.8882733148661126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11172668513388735 }, { "epoch": 0.8074137196420963, "grad_norm": 336.6178712001766, "learning_rate": 3.933985087910082e-07, "loss": 0.5309, "step": 3790, "success_rate.epoch.env.abd": 0.9850299401197605, "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9511494252873564, "success_rate.epoch.env.logic": 0.8962765957446809, "success_rate.epoch.env.math": 0.9609483960948396, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7936884967763828, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8665755872179947, "success_rate.epoch.global": 0.8803170409511228, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9994365464632455, "tokens_p.mean_in_band": 0.5403262867647058, "tokens_rate.above_band": 0.9769647696476965, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023035230352303523 }, { "epoch": 0.8084789092458458, "grad_norm": 256.05312463976554, "learning_rate": 3.9337970489588857e-07, "loss": 0.4742, "step": 3795, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.8964601769911504, "success_rate.epoch.env.math": 0.9609846725499304, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7939681463910538, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.866637784223048, "success_rate.epoch.global": 0.8804749340369393, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977292387543253, "tokens_p.mean_in_band": 0.439453125, "tokens_rate.above_band": 0.9863481228668942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013651877133105802 }, { "epoch": 0.8095440988495952, "grad_norm": 86.39443396100542, "learning_rate": 3.933608766738799e-07, "loss": 0.2105, "step": 3800, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.896551724137931, "success_rate.epoch.env.math": 0.9610750695088045, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7941076870978666, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8666750626498273, "success_rate.epoch.global": 0.8806324110671937, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979319852941176, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9927007299270073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0072992700729927005 }, { "epoch": 0.8106092884533447, "grad_norm": 263.1065856297251, "learning_rate": 3.9334202413870406e-07, "loss": 0.3527, "step": 3805, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.8967343336275375, "success_rate.epoch.env.math": 0.961129106894956, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7937795807978364, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8666667481567116, "success_rate.epoch.global": 0.8805263157894737, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.98828125, "tokens_p.mean_in_band": 0.6473214285714286, "tokens_rate.above_band": 0.8627450980392157, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13725490196078433 }, { "epoch": 0.8116744780570941, "grad_norm": 117.03578843605969, "learning_rate": 3.933231473041006e-07, "loss": 0.365, "step": 3810, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9757085020242915, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.8968253968253969, "success_rate.epoch.env.math": 0.9611470860314524, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7931848852901484, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8662931194806004, "success_rate.epoch.global": 0.8801576872536137, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9983606557377049, "tokens_p.mean_in_band": 0.6310292119565217, "tokens_rate.above_band": 0.9298780487804879, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0701219512195122 }, { "epoch": 0.8127396676608436, "grad_norm": 235.85528454256533, "learning_rate": 3.9330424618382685e-07, "loss": 0.1631, "step": 3815, "success_rate.epoch.env.abd": 0.985207100591716, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9757085020242915, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.8969162995594714, "success_rate.epoch.env.math": 0.961218836565097, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7934636118598383, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8663372354516325, "success_rate.epoch.global": 0.8803149606299212, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9880725190839694, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9924242424242424, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007575757575757576 }, { "epoch": 0.8138048572645931, "grad_norm": 93.67931665339518, "learning_rate": 3.9328532079165786e-07, "loss": 0.491, "step": 3820, "success_rate.epoch.env.abd": 0.9852507374631269, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9757085020242915, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.8970070422535211, "success_rate.epoch.env.math": 0.9612724757952974, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7934746047763203, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8663553274254635, "success_rate.epoch.global": 0.8803407601572739, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9911123853211009, "tokens_p.mean_in_band": 0.6265625, "tokens_rate.above_band": 0.9159663865546218, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08403361344537816 }, { "epoch": 0.8148700468683425, "grad_norm": 125.4854821723632, "learning_rate": 3.9326637114138625e-07, "loss": 0.4263, "step": 3825, "success_rate.epoch.env.abd": 0.9853372434017595, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.8971880492091389, "success_rate.epoch.env.math": 0.9613259668508287, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7935440484196369, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8664123792735982, "success_rate.epoch.global": 0.8804973821989529, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9908900176678446, "tokens_p.mean_in_band": 0.88125, "tokens_rate.above_band": 0.9912434325744308, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008756567425569177 }, { "epoch": 0.815935236472092, "grad_norm": 82.51051557376586, "learning_rate": 3.9324739724682237e-07, "loss": 0.4545, "step": 3830, "success_rate.epoch.env.abd": 0.9853801169590644, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.8973684210526316, "success_rate.epoch.env.math": 0.9613437643810401, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.7938213566151779, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8665626320986436, "success_rate.epoch.global": 0.8806535947712418, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991883116883117, "tokens_p.mean_in_band": 0.84921875, "tokens_rate.above_band": 0.9908088235294118, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009191176470588236 }, { "epoch": 0.8170004260758414, "grad_norm": 197.3993672538796, "learning_rate": 3.9322839912179434e-07, "loss": 0.2086, "step": 3835, "success_rate.epoch.env.abd": 0.9854651162790697, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.8973684210526316, "success_rate.epoch.env.math": 0.9613793103448276, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7940288493794029, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8683214760185389, "success_rate.epoch.global": 0.8808093994778068, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996926883780332, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.9974522292993631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0025477707006369425 }, { "epoch": 0.818065615679591, "grad_norm": 328.06152320211413, "learning_rate": 3.932093767801478e-07, "loss": 0.36, "step": 3840, "success_rate.epoch.env.abd": 0.9854651162790697, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.8976377952755905, "success_rate.epoch.env.math": 0.9614325068870524, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7942359249329759, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8684250580563859, "success_rate.epoch.global": 0.8809647979139504, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992897727272727, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.995475113122172, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004524886877828055 }, { "epoch": 0.8191308052833405, "grad_norm": 108.94169171376168, "learning_rate": 3.931903302357461e-07, "loss": 0.3125, "step": 3845, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.8978165938864628, "success_rate.epoch.env.math": 0.9614678899082569, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7945113788487282, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8685063862698439, "success_rate.epoch.global": 0.8811197916666667, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957414829659319, "tokens_p.mean_in_band": 0.8453125, "tokens_rate.above_band": 0.9900793650793651, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00992063492063492 }, { "epoch": 0.8201959948870899, "grad_norm": 74.44546940479921, "learning_rate": 3.931712595024703e-07, "loss": 0.3465, "step": 3850, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9758064516129032, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.8979057591623036, "success_rate.epoch.env.math": 0.9610627576729271, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7949231796927188, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8685150984411621, "success_rate.epoch.global": 0.8811443433029909, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9909274193548387, "tokens_p.mean_in_band": 0.6448863636363636, "tokens_rate.above_band": 0.9337349397590361, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06626506024096386 }, { "epoch": 0.8212611844908394, "grad_norm": 69.07461298775165, "learning_rate": 3.931521645942189e-07, "loss": 0.2801, "step": 3855, "success_rate.epoch.env.abd": 0.9855491329479769, "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, "success_rate.epoch.env.agentgym:sciworld": 0.9759036144578314, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.8979057591623036, "success_rate.epoch.env.math": 0.9611339734796525, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7947263017356475, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8686039688587598, "success_rate.epoch.global": 0.8811688311688312, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979042473919523, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9955489614243324, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004451038575667656 }, { "epoch": 0.8223263740945889, "grad_norm": 201.19100104991378, "learning_rate": 3.931330455249082e-07, "loss": 0.3657, "step": 3860, "success_rate.epoch.env.abd": 0.9855491329479769, "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, "success_rate.epoch.env.agentgym:sciworld": 0.9759036144578314, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9515669515669516, "success_rate.epoch.env.logic": 0.8979947689625108, "success_rate.epoch.env.math": 0.9611872146118722, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7944018660446518, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8685999866205609, "success_rate.epoch.global": 0.8810635538261997, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982585139318886, "tokens_p.mean_in_band": 0.5830965909090909, "tokens_rate.above_band": 0.9670658682634731, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03293413173652695 }, { "epoch": 0.8233915636983383, "grad_norm": 102.19170892886017, "learning_rate": 3.9311390230847195e-07, "loss": 0.3352, "step": 3865, "success_rate.epoch.env.abd": 0.9855907780979827, "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, "success_rate.epoch.env.agentgym:sciworld": 0.9759036144578314, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9515669515669516, "success_rate.epoch.env.logic": 0.8979947689625108, "success_rate.epoch.env.math": 0.9612579762989972, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7944111776447106, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8686110519330327, "success_rate.epoch.global": 0.8810880829015544, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995049504950495, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.926605504587156, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07339449541284404 }, { "epoch": 0.8244567533020878, "grad_norm": 79.29959450182675, "learning_rate": 3.930947349588618e-07, "loss": 0.3906, "step": 3870, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9515669515669516, "success_rate.epoch.env.logic": 0.8982608695652174, "success_rate.epoch.env.math": 0.9612932604735883, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7942154255319149, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8686649455718068, "success_rate.epoch.global": 0.8811125485122898, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966475095785441, "tokens_p.mean_in_band": 0.621875, "tokens_rate.above_band": 0.9936548223350253, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006345177664974619 }, { "epoch": 0.8255219429058372, "grad_norm": 119.10976458053719, "learning_rate": 3.930755434900465e-07, "loss": 0.2456, "step": 3875, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9517045454545454, "success_rate.epoch.env.logic": 0.8983492615117289, "success_rate.epoch.env.math": 0.9613636363636363, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7944887118193891, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.868716731754682, "success_rate.epoch.global": 0.881266149870801, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959747229916898, "tokens_p.mean_in_band": 0.7857142857142857, "tokens_rate.above_band": 0.99039780521262, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009602194787379973 }, { "epoch": 0.8265871325095867, "grad_norm": 32.38899961535907, "learning_rate": 3.9305632791601284e-07, "loss": 0.3963, "step": 3880, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8842975206611571, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9518413597733711, "success_rate.epoch.env.logic": 0.8984375, "success_rate.epoch.env.math": 0.9614162505674081, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7941663904540934, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.868712672267916, "success_rate.epoch.global": 0.8811612903225806, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999060621242485, "tokens_p.mean_in_band": 0.4990234375, "tokens_rate.above_band": 0.9950149551345963, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004985044865403789 }, { "epoch": 0.8276523221133362, "grad_norm": 124.78146306378007, "learning_rate": 3.93037088250765e-07, "loss": 0.4274, "step": 3885, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8847736625514403, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9518413597733711, "success_rate.epoch.env.logic": 0.8986135181975736, "success_rate.epoch.env.math": 0.9614512471655329, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7941760423560555, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687760185031835, "success_rate.epoch.global": 0.8811855670103093, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975235849056604, "tokens_p.mean_in_band": 0.62939453125, "tokens_rate.above_band": 0.9706959706959707, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029304029304029304 }, { "epoch": 0.8287175117170856, "grad_norm": 116.54112186974726, "learning_rate": 3.930178245083246e-07, "loss": 0.6208, "step": 3890, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9518413597733711, "success_rate.epoch.env.logic": 0.8986135181975736, "success_rate.epoch.env.math": 0.9614861803352968, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7939233817701453, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8688028985212967, "success_rate.epoch.global": 0.8810810810810811, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976615646258503, "tokens_p.mean_in_band": 0.5653782894736842, "tokens_rate.above_band": 0.9586956521739131, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041304347826086954 }, { "epoch": 0.8297827013208351, "grad_norm": 192.17809651681378, "learning_rate": 3.9299853670273095e-07, "loss": 0.3392, "step": 3895, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.952112676056338, "success_rate.epoch.env.logic": 0.8986135181975736, "success_rate.epoch.env.math": 0.9615558570782451, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7935356200527705, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687986468220729, "success_rate.epoch.global": 0.8809768637532134, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964378238341969, "tokens_p.mean_in_band": 0.5504261363636364, "tokens_rate.above_band": 0.9722921914357683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027707808564231738 }, { "epoch": 0.8308478909245846, "grad_norm": 128.3557990554127, "learning_rate": 3.9297922484804087e-07, "loss": 0.5831, "step": 3900, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.952112676056338, "success_rate.epoch.env.logic": 0.8987012987012987, "success_rate.epoch.env.math": 0.9615906009941256, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7926267281105991, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687697392359254, "success_rate.epoch.global": 0.8806161745827985, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9958126550868487, "tokens_p.mean_in_band": 0.5268229166666667, "tokens_rate.above_band": 0.9641148325358851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03588516746411483 }, { "epoch": 0.831913080528334, "grad_norm": 74.77893847425175, "learning_rate": 3.929598889583288e-07, "loss": 0.4409, "step": 3905, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.952112676056338, "success_rate.epoch.env.logic": 0.8987889273356401, "success_rate.epoch.env.math": 0.9616771866546439, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7922419460881, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687505967152306, "success_rate.epoch.global": 0.8805128205128205, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9925986842105263, "tokens_p.mean_in_band": 0.5979567307692307, "tokens_rate.above_band": 0.8976377952755905, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10236220472440945 }, { "epoch": 0.8329782701320835, "grad_norm": 40.35857743506659, "learning_rate": 3.9294052904768646e-07, "loss": 0.2629, "step": 3910, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.952247191011236, "success_rate.epoch.env.logic": 0.8990509059534081, "success_rate.epoch.env.math": 0.9613135402609086, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7923102201774564, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687597895578022, "success_rate.epoch.global": 0.8805377720870678, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0001097775175645, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9964994165694282, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003500583430571762 }, { "epoch": 0.8340434597358329, "grad_norm": 110.93402585348511, "learning_rate": 3.929211451302233e-07, "loss": 0.3101, "step": 3915, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.952247191011236, "success_rate.epoch.env.logic": 0.8984509466437177, "success_rate.epoch.env.math": 0.9609164420485176, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7922546767312111, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8686640985606817, "success_rate.epoch.global": 0.880306905370844, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6944444444444443, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9947321428571428, "tokens_p.mean_in_band": 0.5212204391891891, "tokens_rate.above_band": 0.9497964721845319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050203527815468114 }, { "epoch": 0.8351086493395824, "grad_norm": 86.40069497217755, "learning_rate": 3.9290173722006613e-07, "loss": 0.2324, "step": 3920, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.952247191011236, "success_rate.epoch.env.logic": 0.8986254295532646, "success_rate.epoch.env.math": 0.9609865470852018, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7924590163934426, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687086313686105, "success_rate.epoch.global": 0.8804597701149425, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9937977099236641, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9776119402985075, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022388059701492536 }, { "epoch": 0.8361738389433319, "grad_norm": 136.23354918135206, "learning_rate": 3.928823053313593e-07, "loss": 0.3131, "step": 3925, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.976, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.952513966480447, "success_rate.epoch.env.logic": 0.8987124463519314, "success_rate.epoch.env.math": 0.9605734767025089, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7924713584288052, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687043644522966, "success_rate.epoch.global": 0.8803571428571428, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0000736160188457, "tokens_p.mean_in_band": 0.6852463942307693, "tokens_rate.above_band": 0.9849187935034803, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015081206496519721 }, { "epoch": 0.8372390285470814, "grad_norm": 107.97998022558767, "learning_rate": 3.9286284947826466e-07, "loss": 0.3716, "step": 3930, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8861788617886179, "success_rate.epoch.env.agentgym:sciworld": 0.9760956175298805, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.952513966480447, "success_rate.epoch.env.logic": 0.8987993138936535, "success_rate.epoch.env.math": 0.9606263982102908, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7926749509483323, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.868839300385584, "success_rate.epoch.global": 0.8805095541401274, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977800546448088, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9891891891891892, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010810810810810811 }, { "epoch": 0.8383042181508309, "grad_norm": 90.9543254503011, "learning_rate": 3.9284336967496144e-07, "loss": 0.5249, "step": 3935, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8866396761133604, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.952513966480447, "success_rate.epoch.env.logic": 0.8987993138936535, "success_rate.epoch.env.math": 0.9606791778373548, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7928781443972558, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8689253417393826, "success_rate.epoch.global": 0.8806615776081425, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974922839506173, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.997946611909651, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002053388090349076 }, { "epoch": 0.8393694077545804, "grad_norm": 359.79030197159335, "learning_rate": 3.9282386593564645e-07, "loss": 0.321, "step": 3940, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8830645161290323, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.952513966480447, "success_rate.epoch.env.logic": 0.898972602739726, "success_rate.epoch.env.math": 0.9606967396159, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7929572872513857, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8686248720570572, "success_rate.epoch.global": 0.8805590851334181, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983753822629969, "tokens_p.mean_in_band": 0.6539713541666666, "tokens_rate.above_band": 0.9646017699115044, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035398230088495575 }, { "epoch": 0.8404345973583298, "grad_norm": 97.41800296755565, "learning_rate": 3.928043382745338e-07, "loss": 0.2942, "step": 3945, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8830645161290323, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9527777777777777, "success_rate.epoch.env.logic": 0.8982905982905983, "success_rate.epoch.env.math": 0.9607318161535029, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7932269619016606, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8686145591512464, "success_rate.epoch.global": 0.8805837563451777, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000073877068558, "tokens_p.mean_in_band": 0.5984002976190477, "tokens_rate.above_band": 0.9757785467128027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02422145328719723 }, { "epoch": 0.8414997869620793, "grad_norm": 101.13322897540706, "learning_rate": 3.927847867058552e-07, "loss": 0.4425, "step": 3950, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8835341365461847, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9527777777777777, "success_rate.epoch.env.logic": 0.8983774551665243, "success_rate.epoch.env.math": 0.9607843137254902, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7929128738621587, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8686413670444796, "success_rate.epoch.global": 0.8804816223067173, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952008928571429, "tokens_p.mean_in_band": 0.6100260416666666, "tokens_rate.above_band": 0.958904109589041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0410958904109589 }, { "epoch": 0.8425649765658287, "grad_norm": 45.307086293373196, "learning_rate": 3.9276521124385966e-07, "loss": 0.2964, "step": 3955, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8849206349206349, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9527777777777777, "success_rate.epoch.env.logic": 0.8976982097186701, "success_rate.epoch.env.math": 0.9608017817371938, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7928571428571428, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8687021843111417, "success_rate.epoch.global": 0.8803797468354431, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978929731925265, "tokens_p.mean_in_band": 0.4717741935483871, "tokens_rate.above_band": 0.9754358161648178, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02456418383518225 }, { "epoch": 0.8436301661695782, "grad_norm": 184.76332978411142, "learning_rate": 3.927456119028136e-07, "loss": 0.234, "step": 3960, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9529085872576177, "success_rate.epoch.env.logic": 0.8977853492333902, "success_rate.epoch.env.math": 0.9608540925266904, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7928015564202334, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8684037264318415, "success_rate.epoch.global": 0.8802781289506953, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985655737704918, "tokens_p.mean_below_band": 4.6798959374427795e-08, "tokens_p.mean_in_band": 0.58203125, "tokens_rate.above_band": 0.9775641025641025, "tokens_rate.below_band": 0.003205128205128205, "tokens_rate.in_band": 0.019230769230769232 }, { "epoch": 0.8446953557733277, "grad_norm": 156.50849508386275, "learning_rate": 3.927259886970009e-07, "loss": 0.3372, "step": 3965, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9529085872576177, "success_rate.epoch.env.logic": 0.8977853492333902, "success_rate.epoch.env.math": 0.9608888888888889, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7932707861533485, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.868480156107479, "success_rate.epoch.global": 0.880429292929293, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9926581325301205, "tokens_p.mean_in_band": 0.72265625, "tokens_rate.above_band": 0.9880952380952381, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011904761904761904 }, { "epoch": 0.8457605453770771, "grad_norm": 61.34322900107282, "learning_rate": 3.9270634164072287e-07, "loss": 0.3148, "step": 3970, "success_rate.epoch.env.abd": 0.9857954545454546, "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9529085872576177, "success_rate.epoch.env.logic": 0.8979591836734694, "success_rate.epoch.env.math": 0.9609409675987572, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.793148028442146, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8685435501152289, "success_rate.epoch.global": 0.880453972257251, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99811872909699, "tokens_p.mean_in_band": 0.5481770833333334, "tokens_rate.above_band": 0.9900662251655629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009933774834437087 }, { "epoch": 0.8468257349808266, "grad_norm": 141.95414518239207, "learning_rate": 3.92686670748298e-07, "loss": 0.4391, "step": 3975, "success_rate.epoch.env.abd": 0.9857954545454546, "success_rate.epoch.env.agentgym:alfworld": 0.8814229249011858, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9530386740331491, "success_rate.epoch.env.logic": 0.8980458793542906, "success_rate.epoch.env.math": 0.9601240584847143, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7934151065203358, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8685132729716379, "success_rate.epoch.global": 0.8803526448362721, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955533596837944, "tokens_p.mean_below_band": 4.602043190971017e-10, "tokens_p.mean_in_band": 0.72607421875, "tokens_rate.above_band": 0.9656488549618321, "tokens_rate.below_band": 0.003816793893129771, "tokens_rate.in_band": 0.030534351145038167 }, { "epoch": 0.847890924584576, "grad_norm": 169.26328746822526, "learning_rate": 3.9266697603406245e-07, "loss": 0.2742, "step": 3980, "success_rate.epoch.env.abd": 0.9858757062146892, "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9530386740331491, "success_rate.epoch.env.logic": 0.8982188295165394, "success_rate.epoch.env.math": 0.9601769911504425, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.7935483870967742, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8685956596945463, "success_rate.epoch.global": 0.8805031446540881, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989608076009501, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9952718676122931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004728132387706856 }, { "epoch": 0.8489561141883255, "grad_norm": 100.70757867506678, "learning_rate": 3.9264725751236945e-07, "loss": 0.4345, "step": 3985, "success_rate.epoch.env.abd": 0.9858757062146892, "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9505494505494505, "success_rate.epoch.env.logic": 0.8974576271186441, "success_rate.epoch.env.math": 0.9602297834732656, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7931034482758621, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8679421440881406, "success_rate.epoch.global": 0.8800251256281407, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.36666666666666664, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9952293882978723, "tokens_p.mean_in_band": 0.686374470338983, "tokens_rate.above_band": 0.9409409409409409, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05905905905905906 }, { "epoch": 0.850021303792075, "grad_norm": 116.7782777363338, "learning_rate": 3.9262751519758984e-07, "loss": 0.3873, "step": 3990, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, "success_rate.epoch.env.agentgym:sciworld": 0.9762845849802372, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9505494505494505, "success_rate.epoch.env.logic": 0.8976311336717429, "success_rate.epoch.env.math": 0.9602649006622517, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7930479562278725, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8679632787844298, "success_rate.epoch.global": 0.8800501882057716, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964953271028038, "tokens_p.mean_in_band": 0.3697916666666667, "tokens_rate.above_band": 0.9727272727272728, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02727272727272727 }, { "epoch": 0.8510864933958244, "grad_norm": 43.19740481601878, "learning_rate": 3.926077491041116e-07, "loss": 0.2786, "step": 3995, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8818897637795275, "success_rate.epoch.env.agentgym:sciworld": 0.9763779527559056, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9505494505494505, "success_rate.epoch.env.logic": 0.8976311336717429, "success_rate.epoch.env.math": 0.9603174603174603, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7934468358496627, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8680128066983087, "success_rate.epoch.global": 0.8802005012531329, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9934593023255814, "tokens_p.mean_in_band": 0.78466796875, "tokens_rate.above_band": 0.9555555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044444444444444446 }, { "epoch": 0.8521516829995739, "grad_norm": 1092.510596362934, "learning_rate": 3.9258795924634016e-07, "loss": 0.2843, "step": 4000, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9764705882352941, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8977176669484361, "success_rate.epoch.env.math": 0.9603174603174603, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7937780628608082, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.868125878947933, "success_rate.epoch.global": 0.8803504380475594, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964699074074074, "tokens_p.mean_in_band": 0.7330729166666666, "tokens_rate.above_band": 0.989010989010989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01098901098901099 }, { "epoch": 0.8532168726033234, "grad_norm": 72.34301840217742, "learning_rate": 3.925681456386981e-07, "loss": 0.413, "step": 4005, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, "success_rate.epoch.env.agentgym:sciworld": 0.9764705882352941, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8962900505902193, "success_rate.epoch.env.math": 0.9603349493168797, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7940422805893658, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8681049363737113, "success_rate.epoch.global": 0.88025, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988313990973565, "tokens_p.mean_in_band": 0.5166311553030303, "tokens_rate.above_band": 0.9591836734693877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04081632653061224 }, { "epoch": 0.8542820622070728, "grad_norm": 106.61756173721486, "learning_rate": 3.925483082956257e-07, "loss": 0.3229, "step": 4010, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, "success_rate.epoch.env.agentgym:sciworld": 0.9765625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9509536784741145, "success_rate.epoch.env.logic": 0.8962900505902193, "success_rate.epoch.env.math": 0.9604047514298284, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7936660268714012, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8680976151466802, "success_rate.epoch.global": 0.8801498127340824, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971543874172185, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9885433715220949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011456628477905073 }, { "epoch": 0.8553472518108223, "grad_norm": 58.6507490021795, "learning_rate": 3.9252844723158e-07, "loss": 0.2494, "step": 4015, "success_rate.epoch.env.abd": 0.9859943977591037, "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, "success_rate.epoch.env.agentgym:sciworld": 0.9765625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9510869565217391, "success_rate.epoch.env.logic": 0.8963774220724515, "success_rate.epoch.env.math": 0.9604221635883905, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7937420178799489, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8681297418990707, "success_rate.epoch.global": 0.8801745635910224, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916914682539683, "tokens_p.mean_in_band": 0.61328125, "tokens_rate.above_band": 0.9882352941176471, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011764705882352941 }, { "epoch": 0.8564124414145718, "grad_norm": 137.65795026171776, "learning_rate": 3.925085624610358e-07, "loss": 0.4212, "step": 4020, "success_rate.epoch.env.abd": 0.9859943977591037, "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, "success_rate.epoch.env.agentgym:sciworld": 0.9765625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9510869565217391, "success_rate.epoch.env.logic": 0.896551724137931, "success_rate.epoch.env.math": 0.9604916593503073, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.793939393939394, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8682178978184395, "success_rate.epoch.global": 0.8803237858032379, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964345637583892, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.9490445859872612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050955414012738856 }, { "epoch": 0.8574776310183213, "grad_norm": 203.70698780465727, "learning_rate": 3.924886539984848e-07, "loss": 0.3429, "step": 4025, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, "success_rate.epoch.env.agentgym:sciworld": 0.9765625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9510869565217391, "success_rate.epoch.env.logic": 0.896551724137931, "success_rate.epoch.env.math": 0.9605781865965834, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7942019751513221, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8682531914777278, "success_rate.epoch.global": 0.8804726368159204, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917929292929293, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9801980198019802, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019801980198019802 }, { "epoch": 0.8585428206220708, "grad_norm": 89.44674044654744, "learning_rate": 3.9246872185843627e-07, "loss": 0.2971, "step": 4030, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8832684824902723, "success_rate.epoch.env.agentgym:sciworld": 0.9765625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.8966386554621849, "success_rate.epoch.env.math": 0.9601924759405074, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7944638880050907, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.868273875873324, "success_rate.epoch.global": 0.8804968944099378, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964303607214429, "tokens_p.mean_in_band": 0.4097222222222222, "tokens_rate.above_band": 0.9822834645669292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017716535433070866 }, { "epoch": 0.8596080102258202, "grad_norm": 578.7139223438281, "learning_rate": 3.9244876605541657e-07, "loss": 0.2879, "step": 4035, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8837209302325582, "success_rate.epoch.env.agentgym:sciworld": 0.9765625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.8966386554621849, "success_rate.epoch.env.math": 0.959825327510917, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7941550190597204, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8682535513612627, "success_rate.epoch.global": 0.880272952853598, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9986111111111111, "tokens_p.mean_in_band": 0.44389204545454547, "tokens_rate.above_band": 0.9703504043126685, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029649595687331536 }, { "epoch": 0.8606731998295697, "grad_norm": 74.01880630776523, "learning_rate": 3.924287866039694e-07, "loss": 0.3464, "step": 4040, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8841698841698842, "success_rate.epoch.env.agentgym:sciworld": 0.9766536964980544, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.8966386554621849, "success_rate.epoch.env.math": 0.959895379250218, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7944162436548223, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8683327719766976, "success_rate.epoch.global": 0.8804213135068154, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967657930107527, "tokens_p.mean_in_band": 0.7330729166666666, "tokens_rate.above_band": 0.992, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008 }, { "epoch": 0.8617383894333192, "grad_norm": 92.83759227187693, "learning_rate": 3.924087835186555e-07, "loss": 0.1937, "step": 4045, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.8959731543624161, "success_rate.epoch.env.math": 0.9599303135888502, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7943599493029151, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8683190564204171, "success_rate.epoch.global": 0.8803217821782178, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970163316582915, "tokens_p.mean_in_band": 0.6305803571428571, "tokens_rate.above_band": 0.9770867430441899, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022913256955810146 }, { "epoch": 0.8628035790370686, "grad_norm": 129.2356792180417, "learning_rate": 3.923887568140532e-07, "loss": 0.2815, "step": 4050, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.8960603520536463, "success_rate.epoch.env.math": 0.9595827900912647, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7945552389996834, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8683166807600233, "success_rate.epoch.global": 0.8803461063040791, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975887345679012, "tokens_p.mean_in_band": 0.662890625, "tokens_rate.above_band": 0.9418604651162791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05813953488372093 }, { "epoch": 0.8638687686408181, "grad_norm": 447.11940720490844, "learning_rate": 3.9236870650475755e-07, "loss": 0.2807, "step": 4055, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9730769230769231, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.8961474036850922, "success_rate.epoch.env.math": 0.9596529284164859, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7946852261942423, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.868021320997719, "success_rate.epoch.global": 0.8803703703703704, "success_rate.window.env.agentgym:sciworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970817120622568, "tokens_p.mean_in_band": 0.5904947916666666, "tokens_rate.above_band": 0.9941972920696325, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005802707930367505 }, { "epoch": 0.8649339582445675, "grad_norm": 418.50623340237587, "learning_rate": 3.9234863260538133e-07, "loss": 0.4336, "step": 4060, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8812260536398467, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.8963210702341137, "success_rate.epoch.env.math": 0.9596879063719116, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7943127962085308, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8677076880063215, "success_rate.epoch.global": 0.8801479654747225, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975684438040345, "tokens_p.mean_in_band": 0.659912109375, "tokens_rate.above_band": 0.9774647887323944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022535211267605635 }, { "epoch": 0.865999147848317, "grad_norm": 78.41672080751667, "learning_rate": 3.9232853513055403e-07, "loss": 0.3121, "step": 4065, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8816793893129771, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.8964076858813701, "success_rate.epoch.env.math": 0.9596879063719116, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7945724203218681, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8675240904139364, "success_rate.epoch.global": 0.8801724137931034, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998042656587473, "tokens_p.mean_in_band": 0.7019230769230769, "tokens_rate.above_band": 0.9726890756302521, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0273109243697479 }, { "epoch": 0.8670643374520665, "grad_norm": 60.0318132659138, "learning_rate": 3.923084140949227e-07, "loss": 0.3531, "step": 4070, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8816793893129771, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.8967527060782681, "success_rate.epoch.env.math": 0.9597053726169844, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7948960302457467, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8675864628108318, "success_rate.epoch.global": 0.8803198031980319, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9891732283464567, "tokens_p.mean_in_band": 0.8234375, "tokens_rate.above_band": 0.927007299270073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.072992700729927 }, { "epoch": 0.8681295270558159, "grad_norm": 139.29665278406733, "learning_rate": 3.9228826951315135e-07, "loss": 0.3499, "step": 4075, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8816793893129771, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.8967527060782681, "success_rate.epoch.env.math": 0.9593777009507347, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7942101950912523, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8674943258271278, "success_rate.epoch.global": 0.87997542997543, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.5416666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9901315789473685, "tokens_p.mean_in_band": 0.5724909855769231, "tokens_rate.above_band": 0.8142857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18571428571428572 }, { "epoch": 0.8691947166595654, "grad_norm": 85.80021342783449, "learning_rate": 3.9226810139992115e-07, "loss": 0.3224, "step": 4080, "success_rate.epoch.env.abd": 0.9861878453038674, "success_rate.epoch.env.agentgym:alfworld": 0.8821292775665399, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.8961794019933554, "success_rate.epoch.env.math": 0.959412780656304, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7944042753850991, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8675074171893459, "success_rate.epoch.global": 0.88, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998291015625, "tokens_p.mean_in_band": 0.4763327205882353, "tokens_rate.above_band": 0.978343949044586, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02165605095541401 }, { "epoch": 0.8702599062633148, "grad_norm": 190.94003553549646, "learning_rate": 3.9224790976993063e-07, "loss": 0.4686, "step": 4085, "success_rate.epoch.env.abd": 0.9861878453038674, "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.896351575456053, "success_rate.epoch.env.math": 0.9594652867615352, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7939698492462312, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.867540795087502, "success_rate.epoch.global": 0.8799019607843137, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973908918406073, "tokens_p.mean_in_band": 0.53359375, "tokens_rate.above_band": 0.9723247232472325, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027675276752767528 }, { "epoch": 0.8713250958670643, "grad_norm": 164.87786367897124, "learning_rate": 3.922276946378952e-07, "loss": 0.2685, "step": 4090, "success_rate.epoch.env.abd": 0.9861878453038674, "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9517426273458445, "success_rate.epoch.env.logic": 0.896351575456053, "success_rate.epoch.env.math": 0.9595350839431769, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.794228356336261, "success_rate.epoch.env.webshop": 0.9782608695652174, "success_rate.epoch.env_macro_mean": 0.8676263514674247, "success_rate.epoch.global": 0.8800489596083231, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975837628865979, "tokens_p.mean_in_band": 0.8454241071428571, "tokens_rate.above_band": 0.9910600255427842, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008939974457215836 }, { "epoch": 0.8723902854708138, "grad_norm": 90.08712578404828, "learning_rate": 3.922074560185474e-07, "loss": 0.2735, "step": 4095, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.952, "success_rate.epoch.env.logic": 0.896351575456053, "success_rate.epoch.env.math": 0.959552495697074, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7944218113444061, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8677212877097381, "success_rate.epoch.global": 0.8801955990220048, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9999055177626606, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9992447129909365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0007552870090634441 }, { "epoch": 0.8734554750745632, "grad_norm": 158.03905301086172, "learning_rate": 3.921871939266372e-07, "loss": 0.4088, "step": 4100, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8825757575757576, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.8956089478044739, "success_rate.epoch.env.math": 0.959604641168887, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7934918648310388, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8675855814262231, "success_rate.epoch.global": 0.8797313797313797, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9980680868838764, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_p.mean_in_band": 0.49518229166666666, "tokens_rate.above_band": 0.9747557003257329, "tokens_rate.below_band": 0.0008143322475570033, "tokens_rate.in_band": 0.024429967426710098 }, { "epoch": 0.8745206646783127, "grad_norm": 111.59912226078207, "learning_rate": 3.9216690837693136e-07, "loss": 0.3482, "step": 4105, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8830188679245283, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.8950413223140495, "success_rate.epoch.env.math": 0.9596739596739596, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7935564591804817, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8675898383047734, "success_rate.epoch.global": 0.8797560975609756, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971556886227545, "tokens_p.mean_in_band": 0.5600328947368421, "tokens_rate.above_band": 0.977751756440281, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02224824355971897 }, { "epoch": 0.8755858542820623, "grad_norm": 90.48979363370822, "learning_rate": 3.921465993842138e-07, "loss": 0.4444, "step": 4110, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8830188679245283, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.8952145214521452, "success_rate.epoch.env.math": 0.9597602739726028, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.793125, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.867574206873524, "success_rate.epoch.global": 0.8796589524969549, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9912790697674418, "tokens_p.mean_in_band": 0.406982421875, "tokens_rate.above_band": 0.8431372549019608, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1568627450980392 }, { "epoch": 0.8766510438858117, "grad_norm": 92.5598991521915, "learning_rate": 3.9212626696328564e-07, "loss": 0.3172, "step": 4115, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8830188679245283, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.8938271604938272, "success_rate.epoch.env.math": 0.9598118854211202, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7933832709113608, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8674762542736659, "success_rate.epoch.global": 0.8795620437956204, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997539592760181, "tokens_p.mean_in_band": 0.5785590277777778, "tokens_rate.above_band": 0.9608695652173913, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0391304347826087 }, { "epoch": 0.8777162334895612, "grad_norm": 320.6005692861649, "learning_rate": 3.9210591112896503e-07, "loss": 0.2694, "step": 4120, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.8940016433853739, "success_rate.epoch.env.math": 0.9598804950917627, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7934477379095164, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676037871658405, "success_rate.epoch.global": 0.8797083839611178, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972596153846154, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9984639016897081, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0015360983102918587 }, { "epoch": 0.8787814230933106, "grad_norm": 226.12339653147313, "learning_rate": 3.9208553189608706e-07, "loss": 0.283, "step": 4125, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.8942622950819672, "success_rate.epoch.env.math": 0.9598976109215017, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7932108377452507, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676075023805737, "success_rate.epoch.global": 0.8796116504854369, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9880257009345794, "tokens_p.mean_in_band": 0.3138020833333333, "tokens_rate.above_band": 0.9224137931034483, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07758620689655173 }, { "epoch": 0.8798466126970601, "grad_norm": 168.1893708708438, "learning_rate": 3.920651292795041e-07, "loss": 0.3633, "step": 4130, "success_rate.epoch.env.abd": 0.9864130434782609, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.8943488943488943, "success_rate.epoch.env.math": 0.9599829714772243, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7933395580454404, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676415865430186, "success_rate.epoch.global": 0.8797575757575757, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978243670886076, "tokens_p.mean_in_band": 0.734375, "tokens_rate.above_band": 0.9753086419753086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024691358024691357 }, { "epoch": 0.8809118023008096, "grad_norm": 115.04204821439724, "learning_rate": 3.9204470329408526e-07, "loss": 0.4687, "step": 4135, "success_rate.epoch.env.abd": 0.9864130434782609, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.8943488943488943, "success_rate.epoch.env.math": 0.96, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7929857231533209, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676109677821692, "success_rate.epoch.global": 0.8795399515738499, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9892578125, "tokens_p.mean_in_band": 0.5427631578947368, "tokens_rate.above_band": 0.8347826086956521, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16521739130434782 }, { "epoch": 0.881976991904559, "grad_norm": 233.01581167450817, "learning_rate": 3.9202425395471694e-07, "loss": 0.3323, "step": 4140, "success_rate.epoch.env.abd": 0.986449864498645, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.8944353518821604, "success_rate.epoch.env.math": 0.9600340136054422, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7930607187112764, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676320848473552, "success_rate.epoch.global": 0.8795646916565901, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9897629310344828, "tokens_p.mean_in_band": 0.678515625, "tokens_rate.above_band": 0.9206349206349206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07936507936507936 }, { "epoch": 0.8830421815083085, "grad_norm": 121.88659449962579, "learning_rate": 3.920037812763025e-07, "loss": 0.3967, "step": 4145, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.9731800766283525, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8946078431372549, "success_rate.epoch.env.math": 0.9600679694137638, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7933168316831684, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676889478013816, "success_rate.epoch.global": 0.8797101449275362, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994739057239057, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9983193277310924, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016806722689075631 }, { "epoch": 0.884107371112058, "grad_norm": 98.22379595810378, "learning_rate": 3.9198328527376226e-07, "loss": 0.1935, "step": 4150, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.9732824427480916, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8946078431372549, "success_rate.epoch.env.math": 0.9601357082273113, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7932632880098888, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8677109664545746, "success_rate.epoch.global": 0.8797346200241255, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994645305003427, "tokens_p.mean_in_band": 0.6650390625, "tokens_rate.above_band": 0.9972658920027341, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002734107997265892 }, { "epoch": 0.8851725607158074, "grad_norm": 99.45895646299643, "learning_rate": 3.9196276596203355e-07, "loss": 0.3137, "step": 4155, "success_rate.epoch.env.abd": 0.9865591397849462, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8946078431372549, "success_rate.epoch.env.math": 0.96015260703688, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7930289944478717, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8677070433833173, "success_rate.epoch.global": 0.8796385542168674, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979452054794521, "tokens_p.mean_in_band": 0.5353422619047619, "tokens_rate.above_band": 0.9455958549222798, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054404145077720206 }, { "epoch": 0.8862377503195569, "grad_norm": 358.70068808547074, "learning_rate": 3.9194222335607065e-07, "loss": 0.4291, "step": 4160, "success_rate.epoch.env.abd": 0.9865591397849462, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8939641109298532, "success_rate.epoch.env.math": 0.9602368866328257, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7932203389830509, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676735790127467, "success_rate.epoch.global": 0.8796630565583634, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9920804794520548, "tokens_p.mean_in_band": 0.8014322916666666, "tokens_rate.above_band": 0.8902439024390244, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10975609756097561 }, { "epoch": 0.8873029399233063, "grad_norm": 56.52206180161196, "learning_rate": 3.919216574708449e-07, "loss": 0.4367, "step": 4165, "success_rate.epoch.env.abd": 0.9865951742627346, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8942229454841334, "success_rate.epoch.env.math": 0.9603040540540541, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7933477055743764, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8677180701986213, "success_rate.epoch.global": 0.8798076923076923, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9941123188405797, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.971830985915493, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028169014084507043 }, { "epoch": 0.8883681295270558, "grad_norm": 359.63707897897535, "learning_rate": 3.919010683213447e-07, "loss": 0.2945, "step": 4170, "success_rate.epoch.env.abd": 0.9866310160427807, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8944805194805194, "success_rate.epoch.env.math": 0.9598986914309835, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7933579335793358, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.867708823031196, "success_rate.epoch.global": 0.8797118847539016, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9921052631578947, "tokens_p.mean_in_band": 0.6541466346153846, "tokens_rate.above_band": 0.8796296296296297, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12037037037037036 }, { "epoch": 0.8894333191308053, "grad_norm": 114.01964139845309, "learning_rate": 3.9188045592257505e-07, "loss": 0.3021, "step": 4175, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8834586466165414, "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.8944805194805194, "success_rate.epoch.env.math": 0.9595278246205734, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7933681301811483, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8676906378545223, "success_rate.epoch.global": 0.8796163069544365, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_in_band": 0.6779119318181818, "tokens_rate.above_band": 0.9502262443438914, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049773755656108594 }, { "epoch": 0.8904985087345547, "grad_norm": 230.81344888148251, "learning_rate": 3.918598202895582e-07, "loss": 0.345, "step": 4180, "success_rate.epoch.env.abd": 0.9867021276595744, "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, "success_rate.epoch.env.agentgym:sciworld": 0.973384030418251, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.8944805194805194, "success_rate.epoch.env.math": 0.9595789473684211, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7936848559166155, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8677669827586881, "success_rate.epoch.global": 0.8797604790419161, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974688473520249, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9968944099378882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003105590062111801 }, { "epoch": 0.8915636983383042, "grad_norm": 145.8650421568688, "learning_rate": 3.9183916143733335e-07, "loss": 0.2868, "step": 4185, "success_rate.epoch.env.abd": 0.9867021276595744, "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.8944805194805194, "success_rate.epoch.env.math": 0.9596299411269975, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7933884297520661, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8677651384155124, "success_rate.epoch.global": 0.8796650717703349, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997101814516129, "tokens_p.mean_in_band": 0.5989583333333334, "tokens_rate.above_band": 0.9323308270676691, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06766917293233082 }, { "epoch": 0.8926288879420536, "grad_norm": 308.1724686277715, "learning_rate": 3.918184793809564e-07, "loss": 0.2782, "step": 4190, "success_rate.epoch.env.abd": 0.9867021276595744, "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.8937550689375506, "success_rate.epoch.env.math": 0.9596808063838723, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.793398533007335, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8677450275410145, "success_rate.epoch.global": 0.8795698924731182, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979619565217391, "tokens_p.mean_in_band": 0.5003255208333334, "tokens_rate.above_band": 0.968421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031578947368421054 }, { "epoch": 0.8936940775458031, "grad_norm": 69.32629351418618, "learning_rate": 3.917977741355004e-07, "loss": 0.4975, "step": 4195, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.8939271255060729, "success_rate.epoch.env.math": 0.9596977329974811, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.793587786259542, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8678002250207946, "success_rate.epoch.global": 0.8797136038186157, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987980769230769, "tokens_p.mean_in_band": 0.6298828125, "tokens_rate.above_band": 0.9701492537313433, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029850746268656716 }, { "epoch": 0.8947592671495527, "grad_norm": 41.96870777450887, "learning_rate": 3.9177704571605503e-07, "loss": 0.1808, "step": 4200, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8838951310861424, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.8940129449838188, "success_rate.epoch.env.math": 0.959748427672956, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7939652544955806, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8678469506925454, "success_rate.epoch.global": 0.8798569725864124, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.991688829787234, "tokens_p.mean_in_band": 0.857421875, "tokens_rate.above_band": 0.94, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06 }, { "epoch": 0.8958244567533021, "grad_norm": 43.17995073444214, "learning_rate": 3.917562941377272e-07, "loss": 0.2579, "step": 4205, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8843283582089553, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9530026109660574, "success_rate.epoch.env.logic": 0.8940986257073565, "success_rate.epoch.env.math": 0.9598326359832636, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7940907706366128, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8679243745209013, "success_rate.epoch.global": 0.88, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994019138755981, "tokens_p.mean_in_band": 0.8352864583333334, "tokens_rate.above_band": 0.9928741092636579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007125890736342043 }, { "epoch": 0.8968896463570516, "grad_norm": 56.92204749227752, "learning_rate": 3.9173551941564027e-07, "loss": 0.2152, "step": 4210, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8847583643122676, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8940986257073565, "success_rate.epoch.env.math": 0.9598494353826851, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.794224924012158, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8679883152401942, "success_rate.epoch.global": 0.8800237812128419, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967813670411985, "tokens_p.mean_in_band": 0.548828125, "tokens_rate.above_band": 0.9888888888888889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011111111111111112 }, { "epoch": 0.8979548359608011, "grad_norm": 150.98657235749974, "learning_rate": 3.9171472156493495e-07, "loss": 0.1373, "step": 4215, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8851851851851852, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8940986257073565, "success_rate.epoch.env.math": 0.9594820384294068, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.7942961165048543, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8680001894594974, "success_rate.epoch.global": 0.8799287410926366, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997718253968254, "tokens_p.mean_in_band": 0.46337890625, "tokens_rate.above_band": 0.9752321981424149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02476780185758514 }, { "epoch": 0.8990200255645505, "grad_norm": 34.085881201807204, "learning_rate": 3.9169390060076844e-07, "loss": 0.289, "step": 4220, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8856088560885609, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8940986257073565, "success_rate.epoch.env.math": 0.9595664860358483, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.7944831767202183, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8677665415142912, "success_rate.epoch.global": 0.8799525504151838, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982049608355091, "tokens_p.mean_in_band": 0.7213541666666666, "tokens_rate.above_band": 0.9551122194513716, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04488778054862843 }, { "epoch": 0.9000852151683, "grad_norm": 204.394889676229, "learning_rate": 3.9167305653831494e-07, "loss": 0.373, "step": 4225, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8856088560885609, "success_rate.epoch.env.agentgym:sciworld": 0.9734848484848485, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9506493506493506, "success_rate.epoch.env.logic": 0.8941841680129241, "success_rate.epoch.env.math": 0.9596001665972511, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.7946698970321018, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8676478812468633, "success_rate.epoch.global": 0.8799763033175355, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.98820816008316, "tokens_p.mean_in_band": 0.5863589638157894, "tokens_rate.above_band": 0.8350694444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16493055555555555 }, { "epoch": 0.9011504047720494, "grad_norm": 104.2274901005957, "learning_rate": 3.916521893927654e-07, "loss": 0.4514, "step": 4230, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9735849056603774, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9506493506493506, "success_rate.epoch.env.logic": 0.8943548387096775, "success_rate.epoch.env.math": 0.9596337910944652, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.794313369630974, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.867347145797434, "success_rate.epoch.global": 0.8797633136094675, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973611111111111, "tokens_p.mean_in_band": 0.4739583333333333, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0625 }, { "epoch": 0.9022155943757989, "grad_norm": 340.6410492277857, "learning_rate": 3.916312991793277e-07, "loss": 0.4968, "step": 4235, "success_rate.epoch.env.abd": 0.9868766404199475, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9738805970149254, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9506493506493506, "success_rate.epoch.env.logic": 0.894524959742351, "success_rate.epoch.env.math": 0.9596505823627288, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7944377267230955, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8671202586158372, "success_rate.epoch.global": 0.8797872340425532, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979596219931272, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9603960396039604, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039603960396039604 }, { "epoch": 0.9032807839795484, "grad_norm": 29.060378578750147, "learning_rate": 3.916103859132265e-07, "loss": 0.2905, "step": 4240, "success_rate.epoch.env.abd": 0.9868766404199475, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9738805970149254, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9507772020725389, "success_rate.epoch.env.logic": 0.8946945337620579, "success_rate.epoch.env.math": 0.959717607973422, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7946239806704923, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8671703227068359, "success_rate.epoch.global": 0.8799291617473436, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970414201183432, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9883040935672515, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011695906432748537 }, { "epoch": 0.9043459735832978, "grad_norm": 0.0, "learning_rate": 3.915894496097032e-07, "loss": 0.286, "step": 4245, "success_rate.epoch.env.abd": 0.9869109947643979, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9483204134366925, "success_rate.epoch.env.logic": 0.8948635634028892, "success_rate.epoch.env.math": 0.9597343295973433, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7948717948717948, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8669983435547468, "success_rate.epoch.global": 0.8799528301886792, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_below_band": 1.2014061212539673e-07, "tokens_p.mean_in_band": 0.5553546167695473, "tokens_rate.above_band": 0.8762677484787018, "tokens_rate.below_band": 0.0005070993914807302, "tokens_rate.in_band": 0.12322515212981744 }, { "epoch": 0.9054111631870473, "grad_norm": 184.09510660925153, "learning_rate": 3.9156849028401606e-07, "loss": 0.696, "step": 4250, "success_rate.epoch.env.abd": 0.9869451697127938, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9484536082474226, "success_rate.epoch.env.logic": 0.8949478748997595, "success_rate.epoch.env.math": 0.9598177299088649, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7946940006029545, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.867012642400081, "success_rate.epoch.global": 0.8799764428739694, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975316210635002, "tokens_p.mean_in_band": 0.7434895833333334, "tokens_rate.above_band": 0.9969119917653114, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003088008234688626 }, { "epoch": 0.9064763527907967, "grad_norm": 30.279198121952643, "learning_rate": 3.9154750795144e-07, "loss": 0.2131, "step": 4255, "success_rate.epoch.env.abd": 0.9869451697127938, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9485861182519281, "success_rate.epoch.env.logic": 0.8950320512820513, "success_rate.epoch.env.math": 0.9598676044683492, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7947019867549668, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8670376012271986, "success_rate.epoch.global": 0.88, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981664540816326, "tokens_p.mean_in_band": 0.7180397727272727, "tokens_rate.above_band": 0.9727047146401985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02729528535980149 }, { "epoch": 0.9075415423945462, "grad_norm": 174.59313184097746, "learning_rate": 3.9152650262726684e-07, "loss": 0.4507, "step": 4260, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9739776951672863, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9485861182519281, "success_rate.epoch.env.logic": 0.8952, "success_rate.epoch.env.math": 0.9595208591491119, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7948255114320096, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8670387417159211, "success_rate.epoch.global": 0.8800235017626322, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9932725694444444, "tokens_p.mean_in_band": 0.6432291666666666, "tokens_rate.above_band": 0.96, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04 }, { "epoch": 0.9086067319982957, "grad_norm": 40.598275032733184, "learning_rate": 3.915054743268052e-07, "loss": 0.2617, "step": 4265, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8827838827838828, "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9485861182519281, "success_rate.epoch.env.logic": 0.8952837729816147, "success_rate.epoch.env.math": 0.9595375722543352, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7950105199879771, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8671472157805767, "success_rate.epoch.global": 0.8801643192488263, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994071815718157, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.9096719216020451, "grad_norm": 242.4380537843047, "learning_rate": 3.914844230653802e-07, "loss": 0.2221, "step": 4270, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8827838827838828, "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.948849104859335, "success_rate.epoch.env.logic": 0.8952837729816147, "success_rate.epoch.env.math": 0.9591752577319588, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7953181272509003, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8671661502667544, "success_rate.epoch.global": 0.8801875732708089, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975016937669376, "tokens_p.mean_in_band": 0.72998046875, "tokens_rate.above_band": 0.9892761394101877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010723860589812333 }, { "epoch": 0.9107371112057946, "grad_norm": 669.6763207594614, "learning_rate": 3.9146334885833396e-07, "loss": 0.3902, "step": 4275, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.948849104859335, "success_rate.epoch.env.logic": 0.8954509177972865, "success_rate.epoch.env.math": 0.9591920857378401, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7953251423434222, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672663207894588, "success_rate.epoch.global": 0.8802107728337236, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972956730769231, "tokens_p.mean_in_band": 0.5191761363636364, "tokens_rate.above_band": 0.9497716894977168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0502283105022831 }, { "epoch": 0.911802300809544, "grad_norm": 240.13102506303147, "learning_rate": 3.914422517210251e-07, "loss": 0.3916, "step": 4280, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.948849104859335, "success_rate.epoch.env.logic": 0.8947368421052632, "success_rate.epoch.env.math": 0.9592760180995475, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7955701885662975, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672313119615097, "success_rate.epoch.global": 0.8802339181286549, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994140625, "tokens_p.mean_in_band": 0.7431640625, "tokens_rate.above_band": 0.9230769230769231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07692307692307693 }, { "epoch": 0.9128674904132935, "grad_norm": 84.75150776611382, "learning_rate": 3.9142113166882925e-07, "loss": 0.354, "step": 4285, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, "success_rate.epoch.env.agentgym:sciworld": 0.9740740740740741, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.8948207171314742, "success_rate.epoch.env.math": 0.9593596059113301, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7954545454545454, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672478852982043, "success_rate.epoch.global": 0.8802570093457944, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971498371335505, "tokens_p.mean_in_band": 0.6361607142857143, "tokens_rate.above_band": 0.956386292834891, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04361370716510903 }, { "epoch": 0.9139326800170431, "grad_norm": 156.27392671996975, "learning_rate": 3.9139998871713836e-07, "loss": 0.3656, "step": 4290, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8832116788321168, "success_rate.epoch.env.agentgym:sciworld": 0.974169741697417, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.8949044585987261, "success_rate.epoch.env.math": 0.9593762823143209, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7952849895553566, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672502970795133, "success_rate.epoch.global": 0.8801633605600934, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9907142857142858, "tokens_p.mean_in_band": 0.651611328125, "tokens_rate.above_band": 0.9162303664921466, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08376963350785341 }, { "epoch": 0.9149978696207925, "grad_norm": 63.44082478644645, "learning_rate": 3.913788228813614e-07, "loss": 0.2266, "step": 4295, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.974169741697417, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.8951548848292296, "success_rate.epoch.env.math": 0.9594428512904547, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7951088577393379, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8673017105517736, "success_rate.epoch.global": 0.8801864801864802, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965029761904762, "tokens_p.mean_in_band": 0.6805555555555556, "tokens_rate.above_band": 0.9790209790209791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02097902097902098 }, { "epoch": 0.916063059224542, "grad_norm": 111.16298476786717, "learning_rate": 3.913576341769238e-07, "loss": 0.3859, "step": 4300, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.974169741697417, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.8953211736716892, "success_rate.epoch.env.math": 0.95949263502455, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7954139368671829, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8673490879794463, "success_rate.epoch.global": 0.880325960419092, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9920833333333333, "tokens_p.mean_in_band": 0.7415364583333334, "tokens_rate.above_band": 0.9259259259259259, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07407407407407407 }, { "epoch": 0.9171282488282915, "grad_norm": 349.41621481729527, "learning_rate": 3.9133642261926775e-07, "loss": 0.4682, "step": 4305, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.8953211736716892, "success_rate.epoch.env.math": 0.9595753368721928, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7955357142857142, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8673879071276658, "success_rate.epoch.global": 0.8804651162790698, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979987684729064, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.9181934384320409, "grad_norm": 404.63530297591666, "learning_rate": 3.91315188223852e-07, "loss": 0.4303, "step": 4310, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9465648854961832, "success_rate.epoch.env.logic": 0.894695170229612, "success_rate.epoch.env.math": 0.9596083231334149, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7954815695600476, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8671516040770321, "success_rate.epoch.global": 0.8802555168408827, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9928125, "tokens_p.mean_in_band": 0.6816625702247191, "tokens_rate.above_band": 0.910010111223458, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08998988877654196 }, { "epoch": 0.9192586280357904, "grad_norm": 70.38997713588995, "learning_rate": 3.9129393100615224e-07, "loss": 0.526, "step": 4315, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.8948616600790514, "success_rate.epoch.env.math": 0.9596412556053812, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7957850994360345, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.867209656287525, "success_rate.epoch.global": 0.8803944315545243, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972826086956522, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9928057553956835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007194244604316547 }, { "epoch": 0.9203238176395399, "grad_norm": 281.8248787932557, "learning_rate": 3.912726509816604e-07, "loss": 0.3975, "step": 4320, "success_rate.epoch.env.abd": 0.987146529562982, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.8951104100946372, "success_rate.epoch.env.math": 0.959674134419552, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7954343314556774, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672063825012092, "success_rate.epoch.global": 0.8803012746234067, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939088983050848, "tokens_p.mean_in_band": 0.471484375, "tokens_rate.above_band": 0.921875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.078125 }, { "epoch": 0.9213890072432893, "grad_norm": 192.9832077656174, "learning_rate": 3.9125134816588524e-07, "loss": 0.2983, "step": 4325, "success_rate.epoch.env.abd": 0.9872122762148338, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.8952755905511811, "success_rate.epoch.env.math": 0.959674134419552, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7956766360675156, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672663563109105, "success_rate.epoch.global": 0.8804398148148148, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992647058823529, "tokens_p.mean_in_band": 0.7731370192307693, "tokens_rate.above_band": 0.9786184210526315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02138157894736842 }, { "epoch": 0.9224541968470388, "grad_norm": 103.33526123252042, "learning_rate": 3.912300225743521e-07, "loss": 0.252, "step": 4330, "success_rate.epoch.env.abd": 0.9872122762148338, "success_rate.epoch.env.agentgym:alfworld": 0.8836363636363637, "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.8953579858379229, "success_rate.epoch.env.math": 0.9597397315982107, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7956830277942046, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672803912374643, "success_rate.epoch.global": 0.8804624277456647, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9922680412371134, "tokens_p.mean_in_band": 0.5939670138888888, "tokens_rate.above_band": 0.8434782608695652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1565217391304348 }, { "epoch": 0.9235193864507882, "grad_norm": 61.21320026286401, "learning_rate": 3.91208674222603e-07, "loss": 0.4061, "step": 4335, "success_rate.epoch.env.abd": 0.9872122762148338, "success_rate.epoch.env.agentgym:alfworld": 0.8840579710144928, "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9468354430379747, "success_rate.epoch.env.logic": 0.8954402515723271, "success_rate.epoch.env.math": 0.9597724502234863, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.7956894006495424, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8673420185117284, "success_rate.epoch.global": 0.8804849884526559, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980837264150944, "tokens_p.mean_in_band": 0.62255859375, "tokens_rate.above_band": 0.9906542056074766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009345794392523364 }, { "epoch": 0.9245845760545377, "grad_norm": 48.43378313483629, "learning_rate": 3.911873031261963e-07, "loss": 0.4339, "step": 4340, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8840579710144928, "success_rate.epoch.env.agentgym:sciworld": 0.9745454545454545, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9468354430379747, "success_rate.epoch.env.logic": 0.8955223880597015, "success_rate.epoch.env.math": 0.9593826157595451, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7954009433962265, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8670195031474527, "success_rate.epoch.global": 0.880161476355248, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.52, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9958333333333333, "tokens_p.mean_in_band": 0.5508928571428572, "tokens_rate.above_band": 0.825, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.175 }, { "epoch": 0.9256497656582872, "grad_norm": 127.43720098759341, "learning_rate": 3.911659093007073e-07, "loss": 0.3267, "step": 4345, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8844765342960289, "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9468354430379747, "success_rate.epoch.env.logic": 0.8956862745098039, "success_rate.epoch.env.math": 0.9593991067803491, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7954077126876656, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8670829519345297, "success_rate.epoch.global": 0.880184331797235, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971931137724551, "tokens_p.mean_in_band": 0.47794117647058826, "tokens_rate.above_band": 0.9515669515669516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04843304843304843 }, { "epoch": 0.9267149552620366, "grad_norm": 48.717733298436826, "learning_rate": 3.9114449276172745e-07, "loss": 0.3, "step": 4350, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8853046594982079, "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.946969696969697, "success_rate.epoch.env.logic": 0.8956862745098039, "success_rate.epoch.env.math": 0.9594155844155844, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7954745812518366, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8671780178739212, "success_rate.epoch.global": 0.8802071346375144, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995101880877743, "tokens_p.mean_in_band": 0.5290178571428571, "tokens_rate.above_band": 0.9891472868217054, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010852713178294573 }, { "epoch": 0.9277801448657861, "grad_norm": 108.72267923029256, "learning_rate": 3.911230535248652e-07, "loss": 0.4403, "step": 4355, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8853046594982079, "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.946969696969697, "success_rate.epoch.env.logic": 0.8959311424100157, "success_rate.epoch.env.math": 0.9594977723774808, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7955346650998825, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672535089758759, "success_rate.epoch.global": 0.8803448275862069, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966755319148937, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9791666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020833333333333332 }, { "epoch": 0.9288453344695355, "grad_norm": 101.6346237726961, "learning_rate": 3.9110159160574513e-07, "loss": 0.3878, "step": 4360, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8861209964412812, "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.946969696969697, "success_rate.epoch.env.logic": 0.8959311424100157, "success_rate.epoch.env.math": 0.959546925566343, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.795834555588149, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8673594526686216, "success_rate.epoch.global": 0.8804822043628013, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975269784172662, "tokens_p.mean_in_band": 0.765625, "tokens_rate.above_band": 0.9985632183908046, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014367816091954023 }, { "epoch": 0.929910524073285, "grad_norm": 82.60250224563595, "learning_rate": 3.9108010702000866e-07, "loss": 0.404, "step": 4365, "success_rate.epoch.env.abd": 0.9873096446700508, "success_rate.epoch.env.agentgym:alfworld": 0.8861209964412812, "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.947103274559194, "success_rate.epoch.env.logic": 0.89609375, "success_rate.epoch.env.math": 0.9596122778675282, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7956610964526531, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.867379486243681, "success_rate.epoch.global": 0.8805045871559632, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982199367088608, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0125 }, { "epoch": 0.9309757136770345, "grad_norm": 32.798710622569736, "learning_rate": 3.910585997833135e-07, "loss": 0.2729, "step": 4370, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9746376811594203, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.8962558502340093, "success_rate.epoch.env.math": 0.959628582963262, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7956076134699853, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8674425573734996, "success_rate.epoch.global": 0.8805269186712485, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995300751879699, "tokens_p.mean_in_band": 0.521875, "tokens_rate.above_band": 0.9851851851851852, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014814814814814815 }, { "epoch": 0.9320409032807839, "grad_norm": 74.0998821619742, "learning_rate": 3.9103706991133397e-07, "loss": 0.261, "step": 4375, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8865248226950354, "success_rate.epoch.env.agentgym:sciworld": 0.9711191335740073, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.8964174454828661, "success_rate.epoch.env.math": 0.9596774193548387, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7957870099473376, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.867158128330988, "success_rate.epoch.global": 0.8805355303810505, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9956293706293706, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9862068965517241, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013793103448275862 }, { "epoch": 0.9331060928845335, "grad_norm": 207.2327266580442, "learning_rate": 3.91015517419761e-07, "loss": 0.3245, "step": 4380, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, "success_rate.epoch.env.agentgym:sciworld": 0.9711191335740073, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.8965785381026439, "success_rate.epoch.env.math": 0.9596936719064894, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7961448598130841, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672432344578194, "success_rate.epoch.global": 0.8806720768087781, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974464980544747, "tokens_p.mean_in_band": 0.716796875, "tokens_rate.above_band": 0.9922779922779923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007722007722007722 }, { "epoch": 0.934171282488283, "grad_norm": 127.374653904594, "learning_rate": 3.909939423243018e-07, "loss": 0.2777, "step": 4385, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.8967391304347826, "success_rate.epoch.env.math": 0.959323399113975, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7960910151691949, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8672562202072047, "success_rate.epoch.global": 0.8805799748829775, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961376404494382, "tokens_p.mean_in_band": 0.5571732954545454, "tokens_rate.above_band": 0.89, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11 }, { "epoch": 0.9352364720920324, "grad_norm": 187.94048768209956, "learning_rate": 3.9097234464068015e-07, "loss": 0.3394, "step": 4390, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8969790859798605, "success_rate.epoch.env.math": 0.9593888218737434, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7962099125364431, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8673068125544959, "success_rate.epoch.global": 0.8807161591971718, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989265267175572, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9924242424242424, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007575757575757576 }, { "epoch": 0.9363016616957819, "grad_norm": 216.5611589905703, "learning_rate": 3.909507243846363e-07, "loss": 0.1997, "step": 4395, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.897138437741686, "success_rate.epoch.env.math": 0.9594377510040161, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7965065502183406, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.867352714243041, "success_rate.epoch.global": 0.8808520332611914, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9928125, "tokens_p.mean_in_band": 0.8154296875, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 0.9373668512995313, "grad_norm": 131.85835247237105, "learning_rate": 3.9092908157192694e-07, "loss": 0.3984, "step": 4400, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8869257950530035, "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8972972972972973, "success_rate.epoch.env.math": 0.9590690208667737, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7968023255813953, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8673991755929911, "success_rate.epoch.global": 0.8808738195471613, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954861111111111, "tokens_p.mean_in_band": 0.283203125, "tokens_rate.above_band": 0.9782608695652174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021739130434782608 }, { "epoch": 0.9384320409032808, "grad_norm": 232.27565672408662, "learning_rate": 3.9090741621832517e-07, "loss": 0.2866, "step": 4405, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, "success_rate.epoch.env.agentgym:sciworld": 0.9712230215827338, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8972972972972973, "success_rate.epoch.env.math": 0.9591509811774129, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7970383275261324, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8674642765806588, "success_rate.epoch.global": 0.8810092055915445, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995816256830601, "tokens_p.mean_in_band": 0.794921875, "tokens_rate.above_band": 0.9891891891891892, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010810810810810811 }, { "epoch": 0.9394972305070303, "grad_norm": 128.26299991951106, "learning_rate": 3.908857283396206e-07, "loss": 0.2535, "step": 4410, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.89745566692367, "success_rate.epoch.env.math": 0.9591836734693877, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7972149695387293, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8675128780927112, "success_rate.epoch.global": 0.8811442842547395, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966577540106952, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9842105263157894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015789473684210527 }, { "epoch": 0.9405624201107797, "grad_norm": 72.79858714010084, "learning_rate": 3.908640179516192e-07, "loss": 0.2646, "step": 4415, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9476309226932669, "success_rate.epoch.env.logic": 0.8969230769230769, "success_rate.epoch.env.math": 0.9592, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7971602434077079, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8674848337326778, "success_rate.epoch.global": 0.8810522735003968, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994762569832403, "tokens_p.mean_in_band": 0.5894886363636364, "tokens_rate.above_band": 0.9701897018970189, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02981029810298103 }, { "epoch": 0.9416276097145292, "grad_norm": 172.1469900938378, "learning_rate": 3.908422850701432e-07, "loss": 0.234, "step": 4420, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8873239436619719, "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9476309226932669, "success_rate.epoch.env.logic": 0.897239263803681, "success_rate.epoch.env.math": 0.9592651757188498, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7972777295105705, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8675301836147067, "success_rate.epoch.global": 0.8811869973949484, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9918154761904762, "tokens_p.mean_in_band": 0.7373046875, "tokens_rate.above_band": 0.9545454545454546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045454545454545456 }, { "epoch": 0.9426927993182787, "grad_norm": 46.584986789611, "learning_rate": 3.9082052971103155e-07, "loss": 0.2454, "step": 4425, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.887719298245614, "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9476309226932669, "success_rate.epoch.env.logic": 0.8966309341500766, "success_rate.epoch.env.math": 0.9593139210211408, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7972230257448655, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8675102805662178, "success_rate.epoch.global": 0.8810951465097862, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986056430446194, "tokens_p.mean_in_band": 0.7485608552631579, "tokens_rate.above_band": 0.9525, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0475 }, { "epoch": 0.9437579889220281, "grad_norm": 133.07355614365466, "learning_rate": 3.907987518901393e-07, "loss": 0.2971, "step": 4430, "success_rate.epoch.env.abd": 0.9874371859296482, "success_rate.epoch.env.agentgym:alfworld": 0.887719298245614, "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9478908188585607, "success_rate.epoch.env.logic": 0.8967100229533282, "success_rate.epoch.env.math": 0.959378733572282, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7970511708586296, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8675342430208893, "success_rate.epoch.global": 0.8811165103401515, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985795454545454, "tokens_p.mean_in_band": 0.47574013157894735, "tokens_rate.above_band": 0.9418960244648318, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0581039755351682 }, { "epoch": 0.9448231785257776, "grad_norm": 85.39737039498038, "learning_rate": 3.90776951623338e-07, "loss": 0.1705, "step": 4435, "success_rate.epoch.env.abd": 0.9874371859296482, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9478908188585607, "success_rate.epoch.env.logic": 0.8968678380443086, "success_rate.epoch.env.math": 0.9594272076372315, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7972855905284435, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8676099974474365, "success_rate.epoch.global": 0.8812507054972345, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948642061281338, "tokens_p.mean_in_band": 0.779296875, "tokens_rate.above_band": 0.9944598337950139, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00554016620498615 }, { "epoch": 0.945888368129527, "grad_norm": 180.09039020012654, "learning_rate": 3.907551289265156e-07, "loss": 0.7679, "step": 4440, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.9713261648745519, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948019801980198, "success_rate.epoch.env.logic": 0.8968678380443086, "success_rate.epoch.env.math": 0.9594916600476568, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.797231035477358, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8676254852865752, "success_rate.epoch.global": 0.8812718457548765, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0003378378378378, "tokens_p.mean_in_band": 0.6696428571428571, "tokens_rate.above_band": 0.9906291834002677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009370816599732263 }, { "epoch": 0.9469535577332765, "grad_norm": 418.2889210377602, "learning_rate": 3.9073328381557616e-07, "loss": 0.5479, "step": 4445, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948019801980198, "success_rate.epoch.env.logic": 0.8971036585365854, "success_rate.epoch.env.math": 0.9595238095238096, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7971181556195965, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8676875458986402, "success_rate.epoch.global": 0.8812929383939633, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980730563002681, "tokens_p.mean_in_band": 0.53359375, "tokens_rate.above_band": 0.9867724867724867, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013227513227513227 }, { "epoch": 0.948018747337026, "grad_norm": 134.60925717271044, "learning_rate": 3.907114163064404e-07, "loss": 0.3851, "step": 4450, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8881118881118881, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9484029484029484, "success_rate.epoch.env.logic": 0.8964992389649924, "success_rate.epoch.env.math": 0.9595879556259905, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7971766061653702, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8676785753076505, "success_rate.epoch.global": 0.8813139835752053, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990942028985508, "tokens_p.mean_in_band": 0.6746651785714286, "tokens_rate.above_band": 0.9857142857142858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014285714285714285 }, { "epoch": 0.9490839369407754, "grad_norm": 171.1407566587678, "learning_rate": 3.906895264150451e-07, "loss": 0.3907, "step": 4455, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8885017421602788, "success_rate.epoch.env.agentgym:sciworld": 0.9715302491103203, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9484029484029484, "success_rate.epoch.env.logic": 0.8967350037965072, "success_rate.epoch.env.math": 0.9596039603960396, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7971223021582734, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8677412114280691, "success_rate.epoch.global": 0.8813349814585909, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980274086378738, "tokens_p.mean_in_band": 0.587890625, "tokens_rate.above_band": 0.9804560260586319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019543973941368076 }, { "epoch": 0.9501491265445249, "grad_norm": 120.35469660672833, "learning_rate": 3.906676141573435e-07, "loss": 0.3803, "step": 4460, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.8885017421602788, "success_rate.epoch.env.agentgym:sciworld": 0.9716312056737588, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9484029484029484, "success_rate.epoch.env.logic": 0.8967350037965072, "success_rate.epoch.env.math": 0.9596518987341772, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7973555619430871, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.867781634616889, "success_rate.epoch.global": 0.8814681782467167, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975868725868726, "tokens_p.mean_in_band": 0.8385416666666666, "tokens_rate.above_band": 0.9885496183206107, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011450381679389313 }, { "epoch": 0.9512143161482743, "grad_norm": 179.98681087678486, "learning_rate": 3.9064567954930506e-07, "loss": 0.1955, "step": 4465, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9716312056737588, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9485294117647058, "success_rate.epoch.env.logic": 0.8968133535660091, "success_rate.epoch.env.math": 0.9597156398104265, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7975301550832855, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.867857115896554, "success_rate.epoch.global": 0.8816010763538513, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995535714285714, "tokens_p.mean_in_band": 0.708984375, "tokens_rate.above_band": 0.9955555555555555, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0044444444444444444 }, { "epoch": 0.9522795057520239, "grad_norm": 190.29942577057642, "learning_rate": 3.906237226069156e-07, "loss": 0.3317, "step": 4470, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, "success_rate.epoch.env.agentgym:sciworld": 0.9717314487632509, "success_rate.epoch.env.agentgym:textcraft": 0.9833333333333333, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9486552567237164, "success_rate.epoch.env.logic": 0.8968133535660091, "success_rate.epoch.env.math": 0.9597156398104265, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7977044476327116, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8679835320546453, "success_rate.epoch.global": 0.8817204301075269, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971496683250415, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9934102141680395, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006589785831960461 }, { "epoch": 0.9533446953557734, "grad_norm": 65.00760099139049, "learning_rate": 3.906017433461772e-07, "loss": 0.2975, "step": 4475, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, "success_rate.epoch.env.agentgym:sciworld": 0.9719298245614035, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948780487804878, "success_rate.epoch.env.logic": 0.8968915845337376, "success_rate.epoch.env.math": 0.9597633136094674, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7975336965873243, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8680337124768646, "success_rate.epoch.global": 0.8817408816289998, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986940298507463, "tokens_p.mean_in_band": 0.55859375, "tokens_rate.above_band": 0.9766763848396501, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023323615160349854 }, { "epoch": 0.9544098849595228, "grad_norm": 309.78135810933054, "learning_rate": 3.905797417831081e-07, "loss": 0.4408, "step": 4480, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, "success_rate.epoch.env.agentgym:sciworld": 0.972027972027972, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.896969696969697, "success_rate.epoch.env.math": 0.9598108747044918, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7977077363896848, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8681183166359002, "success_rate.epoch.global": 0.8818730442556996, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0003551136363635, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.9956483899042646, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004351610095735422 }, { "epoch": 0.9554750745632723, "grad_norm": 31.83015682785597, "learning_rate": 3.90557717933743e-07, "loss": 0.2196, "step": 4485, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.889273356401384, "success_rate.epoch.env.agentgym:sciworld": 0.9721254355400697, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.8970476911430735, "success_rate.epoch.env.math": 0.9599056603773585, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.797823596792669, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8681534169778389, "success_rate.epoch.global": 0.882004911810672, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946428571428572, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.9565402641670218, "grad_norm": 310.69052213980814, "learning_rate": 3.905356718141327e-07, "loss": 0.4136, "step": 4490, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8862068965517241, "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.8965256797583081, "success_rate.epoch.env.math": 0.959937156323645, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7979393245563824, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8678581129810982, "success_rate.epoch.global": 0.8819134701159679, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978971962616823, "tokens_p.mean_in_band": 0.45951021634615385, "tokens_rate.above_band": 0.9762773722627737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023722627737226276 }, { "epoch": 0.9576054537707712, "grad_norm": 70.41276784357738, "learning_rate": 3.905136034403441e-07, "loss": 0.26, "step": 4495, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.8967596081386586, "success_rate.epoch.env.math": 0.9599686028257457, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7981703830760435, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8679387925260724, "success_rate.epoch.global": 0.8820449988861662, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990150429799427, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.9586706433745207, "grad_norm": 192.62387585988628, "learning_rate": 3.9049151282846067e-07, "loss": 0.355, "step": 4500, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.8968373493975904, "success_rate.epoch.env.math": 0.9600470035252644, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7981153626499143, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8679479853926471, "success_rate.epoch.global": 0.8820649755229194, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938226744186046, "tokens_p.mean_in_band": 0.57109375, "tokens_rate.above_band": 0.945054945054945, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054945054945054944 }, { "epoch": 0.9597358329782701, "grad_norm": 212.75497605821852, "learning_rate": 3.904693999945818e-07, "loss": 0.2537, "step": 4505, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.972318339100346, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.8969924812030076, "success_rate.epoch.env.math": 0.960093896713615, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7978329056173368, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8679406733891191, "success_rate.epoch.global": 0.881973771949322, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9881578947368421, "tokens_p.mean_in_band": 0.561141304347826, "tokens_rate.above_band": 0.8050847457627118, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19491525423728814 }, { "epoch": 0.9608010225820196, "grad_norm": 63.52016339492654, "learning_rate": 3.9044726495482316e-07, "loss": 0.1731, "step": 4510, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.8970698722764838, "success_rate.epoch.env.math": 0.9601095033242081, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7979510529311327, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8679685460255705, "success_rate.epoch.global": 0.88199378330373, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_in_band": 0.5911458333333334, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 0.9618662121857691, "grad_norm": 143.65631179993076, "learning_rate": 3.904251077253166e-07, "loss": 0.3271, "step": 4515, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9490291262135923, "success_rate.epoch.env.logic": 0.8972243060765192, "success_rate.epoch.env.math": 0.960171807887544, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7978390673869775, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8679893432568213, "success_rate.epoch.global": 0.8820137502772233, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972457627118644, "tokens_p.mean_in_band": 0.6216947115384616, "tokens_rate.above_band": 0.9577922077922078, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04220779220779221 }, { "epoch": 0.9629314017895185, "grad_norm": 46.06446517638285, "learning_rate": 3.904029283222102e-07, "loss": 0.2877, "step": 4520, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9491525423728814, "success_rate.epoch.env.logic": 0.8973013493253373, "success_rate.epoch.env.math": 0.9602028872415138, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7981833664490491, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8680416921499255, "success_rate.epoch.global": 0.8821444395214887, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977605863192183, "tokens_p.mean_in_band": 0.662109375, "tokens_rate.above_band": 0.9903225806451613, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00967741935483871 }, { "epoch": 0.963996591393268, "grad_norm": 103.31495680208405, "learning_rate": 3.9038072676166814e-07, "loss": 0.1764, "step": 4525, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9491525423728814, "success_rate.epoch.env.logic": 0.897378277153558, "success_rate.epoch.env.math": 0.9602958349552355, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7983550765740216, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8680727453923726, "success_rate.epoch.global": 0.8822748395662757, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9926470588235294, "tokens_p.mean_in_band": 0.840625, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 0.9650617809970174, "grad_norm": 122.40428764586784, "learning_rate": 3.903585030598707e-07, "loss": 0.4268, "step": 4530, "success_rate.epoch.env.abd": 0.9875930521091811, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9724137931034482, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.8974550898203593, "success_rate.epoch.env.math": 0.9603112840466926, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.79841449603624, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8681005057760899, "success_rate.epoch.global": 0.8822944297082228, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969347133757962, "tokens_p.mean_in_band": 0.3919270833333333, "tokens_rate.above_band": 0.9849435382685069, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015056461731493099 }, { "epoch": 0.9661269706007669, "grad_norm": 162.64072871959314, "learning_rate": 3.903362572330144e-07, "loss": 0.1943, "step": 4535, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9725085910652921, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.8968609865470852, "success_rate.epoch.env.math": 0.9603421461897356, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7986425339366516, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8680814425941882, "success_rate.epoch.global": 0.8823139765952749, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959302325581395, "tokens_p.mean_in_band": 0.7574869791666666, "tokens_rate.above_band": 0.947136563876652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05286343612334802 }, { "epoch": 0.9671921602045164, "grad_norm": 97.4979954005614, "learning_rate": 3.9031398929731187e-07, "loss": 0.3347, "step": 4540, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9725085910652921, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.8970149253731343, "success_rate.epoch.env.math": 0.9603883495145631, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.798079638520192, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8680484650245895, "success_rate.epoch.global": 0.8821129245699162, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.7999999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9964788732394366, "tokens_p.mean_below_band": 3.510081114654895e-12, "tokens_p.mean_in_band": 0.5606971153846154, "tokens_rate.above_band": 0.9102564102564102, "tokens_rate.below_band": 0.00641025641025641, "tokens_rate.in_band": 0.08333333333333333 }, { "epoch": 0.9682573498082658, "grad_norm": 88.78830832046516, "learning_rate": 3.9029169926899173e-07, "loss": 0.1327, "step": 4545, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.8865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9726962457337884, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.8971684053651267, "success_rate.epoch.env.math": 0.9604804339403332, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.798079638520192, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8680878485778859, "success_rate.epoch.global": 0.8822427847543511, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0008423180592991, "tokens_p.mean_in_band": 0.8212890625, "tokens_rate.above_band": 0.9946380697050938, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005361930294906166 }, { "epoch": 0.9693225394120153, "grad_norm": 71.49164727229137, "learning_rate": 3.9026938716429883e-07, "loss": 0.2396, "step": 4550, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.886986301369863, "success_rate.epoch.env.agentgym:sciworld": 0.9727891156462585, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.8965773809523809, "success_rate.epoch.env.math": 0.9605110336817654, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7982505643340858, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.867832301001599, "success_rate.epoch.global": 0.8821522887323944, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.000680126002291, "tokens_p.mean_in_band": 0.48931308962264153, "tokens_rate.above_band": 0.9427645788336934, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05723542116630669 }, { "epoch": 0.9703877290157648, "grad_norm": 340.58904556963864, "learning_rate": 3.9024705299949416e-07, "loss": 0.5943, "step": 4555, "success_rate.epoch.env.abd": 0.9876543209876543, "success_rate.epoch.env.agentgym:alfworld": 0.8877551020408163, "success_rate.epoch.env.agentgym:sciworld": 0.9727891156462585, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.8965773809523809, "success_rate.epoch.env.math": 0.9605720912253576, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7983643542019176, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8679319768867263, "success_rate.epoch.global": 0.8822818201802594, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968580163043478, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9986431478968792, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0013568521031207597 }, { "epoch": 0.9714529186195143, "grad_norm": 46.45539765360912, "learning_rate": 3.902246967908546e-07, "loss": 0.3957, "step": 4560, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, "success_rate.epoch.env.agentgym:sciworld": 0.9727891156462585, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.8966542750929368, "success_rate.epoch.env.math": 0.9606329602470088, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7982530290222598, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8676635684076338, "success_rate.epoch.global": 0.882191480017567, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955929487179487, "tokens_p.mean_in_band": 0.58203125, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 0.9725181082232638, "grad_norm": 251.31241027828054, "learning_rate": 3.902023185546732e-07, "loss": 0.526, "step": 4565, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, "success_rate.epoch.env.agentgym:sciworld": 0.9728813559322034, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.896807720861173, "success_rate.epoch.env.math": 0.9602776706517547, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7982554867754643, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.867653827881464, "success_rate.epoch.global": 0.8821013380127221, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983584630350194, "tokens_p.mean_in_band": 0.3706597222222222, "tokens_rate.above_band": 0.982791586998088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017208413001912046 }, { "epoch": 0.9735832978270132, "grad_norm": 19.46919745696725, "learning_rate": 3.9017991830725907e-07, "loss": 0.2465, "step": 4570, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.8962194217939214, "success_rate.epoch.env.math": 0.9603235747303543, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.797920179876335, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8675823656226446, "success_rate.epoch.global": 0.8819018404907976, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9970509105960265, "tokens_p.mean_below_band": 1.2759119272232056e-07, "tokens_p.mean_in_band": 0.6207853618421053, "tokens_rate.above_band": 0.9393468118195957, "tokens_rate.below_band": 0.0015552099533437014, "tokens_rate.in_band": 0.05909797822706065 }, { "epoch": 0.9746484874307627, "grad_norm": 264.0910856776589, "learning_rate": 3.901574960649373e-07, "loss": 0.2539, "step": 4575, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9495192307692307, "success_rate.epoch.env.logic": 0.8962962962962963, "success_rate.epoch.env.math": 0.9603846153846154, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7981471083660865, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8676265914457513, "success_rate.epoch.global": 0.8820310790107244, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972474093264249, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9974160206718347, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002583979328165375 }, { "epoch": 0.9757136770345122, "grad_norm": 67.71812632946968, "learning_rate": 3.9013505184404924e-07, "loss": 0.2042, "step": 4580, "success_rate.epoch.env.abd": 0.9877149877149877, "success_rate.epoch.env.agentgym:alfworld": 0.8847457627118644, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.8965262379896526, "success_rate.epoch.env.math": 0.9604150653343582, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7980364656381487, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8676539609518116, "success_rate.epoch.global": 0.8820507214691736, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967657342657342, "tokens_p.mean_in_band": 0.578125, "tokens_rate.above_band": 0.9821428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017857142857142856 }, { "epoch": 0.9767788666382616, "grad_norm": 104.68609114344093, "learning_rate": 3.901125856609519e-07, "loss": 0.304, "step": 4585, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8851351351351351, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.8966789667896679, "success_rate.epoch.env.math": 0.9604454685099847, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7979260089686099, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8677343532169339, "success_rate.epoch.global": 0.8820703210307927, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976968174204355, "tokens_p.mean_in_band": 0.44921875, "tokens_rate.above_band": 0.9933444259567388, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0066555740432612314 }, { "epoch": 0.9778440562420111, "grad_norm": 108.78841907739165, "learning_rate": 3.900900975320185e-07, "loss": 0.5862, "step": 4590, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8851351351351351, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8966789667896679, "success_rate.epoch.env.math": 0.9604606525911709, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7975391498881432, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8674940312427128, "success_rate.epoch.global": 0.881762652705061, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9961947737068966, "tokens_p.mean_below_band": 8.307397365570068e-07, "tokens_p.mean_in_band": 0.4822353957286432, "tokens_rate.above_band": 0.9488752556237219, "tokens_rate.below_band": 0.0002556237218813906, "tokens_rate.in_band": 0.05086912065439673 }, { "epoch": 0.9789092458457606, "grad_norm": 63.496836056276955, "learning_rate": 3.900675874736383e-07, "loss": 0.306, "step": 4595, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8851351351351351, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8968312453942521, "success_rate.epoch.env.math": 0.9605061349693251, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7972633342641721, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.867486935366237, "success_rate.epoch.global": 0.8816735672259751, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.989375, "tokens_p.mean_in_band": 0.5602678571428571, "tokens_rate.above_band": 0.8771929824561403, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12280701754385964 }, { "epoch": 0.97997443544951, "grad_norm": 61.82864261391061, "learning_rate": 3.900450555022164e-07, "loss": 0.2415, "step": 4600, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8969830757910228, "success_rate.epoch.env.math": 0.9605817068503636, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7973199329983249, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8675828359248992, "success_rate.epoch.global": 0.8818023508924684, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992559523809523, "tokens_p.mean_in_band": 0.7760416666666666, "tokens_rate.above_band": 0.9949238578680203, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005076142131979695 }, { "epoch": 0.9810396250532595, "grad_norm": 124.27735162344112, "learning_rate": 3.900225016341739e-07, "loss": 0.3287, "step": 4605, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9474940334128878, "success_rate.epoch.env.logic": 0.8971344599559148, "success_rate.epoch.env.math": 0.9605967865340475, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7971014492753623, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8675895261508871, "success_rate.epoch.global": 0.8817134159599913, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964755639097744, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9708029197080292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029197080291970802 }, { "epoch": 0.9821048146570089, "grad_norm": 396.5823067791187, "learning_rate": 3.8999992588594804e-07, "loss": 0.2374, "step": 4610, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9730639730639731, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9474940334128878, "success_rate.epoch.env.logic": 0.8972853998532648, "success_rate.epoch.env.math": 0.9606720122184039, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7969359331476323, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8676033124731578, "success_rate.epoch.global": 0.8817332754126846, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969800420168067, "tokens_p.mean_in_band": 0.6180555555555556, "tokens_rate.above_band": 0.9635627530364372, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03643724696356275 }, { "epoch": 0.9831700042607584, "grad_norm": 117.84430981737731, "learning_rate": 3.899773282739917e-07, "loss": 0.2983, "step": 4615, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9731543624161074, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9474940334128878, "success_rate.epoch.env.logic": 0.8973607038123167, "success_rate.epoch.env.math": 0.9607469512195121, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7971054828833843, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8676406017502529, "success_rate.epoch.global": 0.8818615751789977, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946646341463414, "tokens_p.mean_in_band": 0.8854166666666666, "tokens_rate.above_band": 0.9820359281437125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017964071856287425 }, { "epoch": 0.9842351938645079, "grad_norm": 33.14586394289726, "learning_rate": 3.89954708814774e-07, "loss": 0.3016, "step": 4620, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9731543624161074, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.8973607038123167, "success_rate.epoch.env.math": 0.9608216051730696, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7967185761957731, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8676606858172906, "success_rate.epoch.global": 0.8817728651928912, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969919786096256, "tokens_p.mean_in_band": 0.4407552083333333, "tokens_rate.above_band": 0.9396984924623115, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06030150753768844 }, { "epoch": 0.9853003834682573, "grad_norm": 56.180461328817415, "learning_rate": 3.8993206752477977e-07, "loss": 0.5396, "step": 4625, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9731543624161074, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.8975859546452085, "success_rate.epoch.env.math": 0.9608662613981763, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7968880244512364, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8677033511262852, "success_rate.epoch.global": 0.8819008443386014, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960526315789474, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.979381443298969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020618556701030927 }, { "epoch": 0.9863655730720068, "grad_norm": 249.9598395808165, "learning_rate": 3.8990940442050986e-07, "loss": 0.2886, "step": 4630, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.8976608187134503, "success_rate.epoch.env.math": 0.960910815939279, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7965584235359423, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8677005137273103, "success_rate.epoch.global": 0.8818122837370242, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964247881355932, "tokens_p.mean_in_band": 0.5497159090909091, "tokens_rate.above_band": 0.9554655870445344, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044534412955465584 }, { "epoch": 0.9874307626757562, "grad_norm": 165.16161308596384, "learning_rate": 3.8988671951848085e-07, "loss": 0.3718, "step": 4635, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.8977355734112491, "success_rate.epoch.env.math": 0.9609700644183403, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7962860310421286, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8676879328803145, "success_rate.epoch.global": 0.8817239144523655, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994724025974026, "tokens_p.mean_in_band": 0.5266927083333334, "tokens_rate.above_band": 0.927710843373494, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07228915662650602 }, { "epoch": 0.9884959522795057, "grad_norm": 76.65230786453098, "learning_rate": 3.8986401283522554e-07, "loss": 0.4237, "step": 4640, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.973421926910299, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9477434679334917, "success_rate.epoch.env.logic": 0.8978847556528081, "success_rate.epoch.env.math": 0.9609996213555472, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7959014123511493, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8676912920897037, "success_rate.epoch.global": 0.8816357358653432, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.888888888888889, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998995983935743, "tokens_p.mean_in_band": 0.489013671875, "tokens_rate.above_band": 0.9841897233201581, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015810276679841896 }, { "epoch": 0.9895611418832552, "grad_norm": 110.83515572846225, "learning_rate": 3.898412843872922e-07, "loss": 0.3044, "step": 4645, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.973421926910299, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9481132075471698, "success_rate.epoch.env.logic": 0.8978847556528081, "success_rate.epoch.env.math": 0.9610291335603481, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7961272475795297, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.867782397630917, "success_rate.epoch.global": 0.8817633110584178, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998046875, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.9906263314870047, "grad_norm": 142.16975994360337, "learning_rate": 3.898185341912453e-07, "loss": 0.4906, "step": 4650, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9481132075471698, "success_rate.epoch.env.logic": 0.8979591836734694, "success_rate.epoch.env.math": 0.9606953892668179, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7964088397790056, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8677924233392027, "success_rate.epoch.global": 0.8817829457364341, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996641074856046, "tokens_p.mean_in_band": 0.50703125, "tokens_rate.above_band": 0.9904942965779467, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009505703422053232 }, { "epoch": 0.9916915210907542, "grad_norm": 34.441900158692974, "learning_rate": 3.897957622636649e-07, "loss": 0.1985, "step": 4655, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9803921568627451, "success_rate.epoch.env.ded": 0.9482352941176471, "success_rate.epoch.env.logic": 0.8981077147016011, "success_rate.epoch.env.math": 0.9607547169811321, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7965212589729431, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8678682889182577, "success_rate.epoch.global": 0.8819100881910088, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973653395784543, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9976635514018691, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002336448598130841 }, { "epoch": 0.9927567106945037, "grad_norm": 105.86226379307871, "learning_rate": 3.8977296862114704e-07, "loss": 0.4194, "step": 4660, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9803921568627451, "success_rate.epoch.env.ded": 0.9482352941176471, "success_rate.epoch.env.logic": 0.8982558139534884, "success_rate.epoch.env.math": 0.9607695209354961, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7965821389195149, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8679216187232842, "success_rate.epoch.global": 0.8819295229909755, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975453172205438, "tokens_p.mean_in_band": 0.6919642857142857, "tokens_rate.above_band": 0.9792899408284024, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020710059171597635 }, { "epoch": 0.9938219002982531, "grad_norm": 119.19625646197002, "learning_rate": 3.897501532803037e-07, "loss": 0.4187, "step": 4665, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9803921568627451, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.8977519941986947, "success_rate.epoch.env.math": 0.9604221635883905, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7967502065546681, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8678815594661976, "success_rate.epoch.global": 0.8818415969092079, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987026862026862, "tokens_p.mean_in_band": 0.6143973214285714, "tokens_rate.above_band": 0.975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025 }, { "epoch": 0.9948870899020026, "grad_norm": 160.6021871896516, "learning_rate": 3.8972731625776244e-07, "loss": 0.343, "step": 4670, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.8978260869565218, "success_rate.epoch.env.math": 0.9604817463304479, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7963676389653275, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8679261982406973, "success_rate.epoch.global": 0.8817538593481989, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942354368932039, "tokens_p.mean_in_band": 0.3483664772727273, "tokens_rate.above_band": 0.9035087719298246, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09649122807017543 }, { "epoch": 0.995952279505752, "grad_norm": 371.81707220231846, "learning_rate": 3.897044575701667e-07, "loss": 0.4426, "step": 4675, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9735099337748344, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.8979000724112962, "success_rate.epoch.env.math": 0.9601653513716648, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7962606543854825, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8678998169259036, "success_rate.epoch.global": 0.8816663097022918, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9940378289473685, "tokens_p.mean_in_band": 0.5005095108695652, "tokens_rate.above_band": 0.8685714285714285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13142857142857142 }, { "epoch": 0.9970174691095015, "grad_norm": 447.35902529599747, "learning_rate": 3.8968157723417574e-07, "loss": 0.4631, "step": 4680, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8859060402684564, "success_rate.epoch.env.agentgym:sciworld": 0.9735973597359736, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.8979000724112962, "success_rate.epoch.env.math": 0.9602400600150037, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7964844822850865, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8679349044262746, "success_rate.epoch.global": 0.8817928968763372, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977477477477478, "tokens_p.mean_in_band": 0.78515625, "tokens_rate.above_band": 0.9964093357271095, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003590664272890485 }, { "epoch": 0.998082658713251, "grad_norm": 170.68042219194936, "learning_rate": 3.896586752664645e-07, "loss": 0.6085, "step": 4685, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8866666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9735973597359736, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.8972503617945007, "success_rate.epoch.env.math": 0.9602996254681648, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7961031833150384, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8679157391775955, "success_rate.epoch.global": 0.8815986321863646, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975667104111986, "tokens_p.mean_in_band": 0.3960597826086957, "tokens_rate.above_band": 0.9613120269133726, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03868797308662742 }, { "epoch": 0.9991478483170004, "grad_norm": 184.5098625520693, "learning_rate": 3.89635751683724e-07, "loss": 0.5809, "step": 4690, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8866666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9735973597359736, "success_rate.epoch.env.agentgym:textcraft": 0.9838709677419355, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.8973246565437455, "success_rate.epoch.env.math": 0.9603293413173652, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.7958904109589041, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.867929889050977, "success_rate.epoch.global": 0.8815115286080273, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995306324110672, "tokens_p.mean_in_band": 0.671630859375, "tokens_rate.above_band": 0.9405204460966543, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05947955390334572 }, { "epoch": 1.00021303792075, "grad_norm": 144.29975023452522, "learning_rate": 3.8961280650266055e-07, "loss": 0.5425, "step": 4695, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979272959183674, "tokens_p.mean_in_band": 0.2652994791666667, "tokens_rate.above_band": 0.9702970297029703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0297029702970297 }, { "epoch": 1.0012782275244994, "grad_norm": 431.7095323972743, "learning_rate": 3.895898397399966e-07, "loss": 0.5209, "step": 4700, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 1.0, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.science": 1.0, "success_rate.epoch.env_macro_mean": 1.0, "success_rate.epoch.global": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992122755524862, "tokens_p.mean_in_band": 0.8058035714285714, "tokens_rate.above_band": 0.9975887013434378, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002411298656562177 }, { "epoch": 1.0023434171282488, "grad_norm": 251.24346855595547, "learning_rate": 3.8956685141247e-07, "loss": 1.0089, "step": 4705, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.6666666666666666, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.science": 0.3333333333333333, "success_rate.epoch.env_macro_mean": 0.8333333333333334, "success_rate.epoch.global": 0.8, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9947363429438544, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.5964307598039216, "tokens_rate.above_band": 0.9268635724331927, "tokens_rate.below_band": 0.0014064697609001407, "tokens_rate.in_band": 0.07172995780590717 }, { "epoch": 1.0034086067319983, "grad_norm": 105.24981153941961, "learning_rate": 3.895438415368346e-07, "loss": 1.0138, "step": 4710, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.6666666666666666, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.science": 0.375, "success_rate.epoch.env_macro_mean": 0.8488095238095238, "success_rate.epoch.global": 0.7666666666666667, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.7999999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9921171171171171, "tokens_p.mean_in_band": 0.4274796195652174, "tokens_rate.above_band": 0.8283582089552238, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17164179104477612 }, { "epoch": 1.0044737963357477, "grad_norm": 216.0384760527122, "learning_rate": 3.895208101298598e-07, "loss": 0.9622, "step": 4715, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.science": 0.36363636363636365, "success_rate.epoch.env_macro_mean": 0.8172541743970315, "success_rate.epoch.global": 0.725, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5208333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9988245784695201, "tokens_p.mean_in_band": 0.5294596354166666, "tokens_rate.above_band": 0.969811320754717, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03018867924528302 }, { "epoch": 1.0055389859394972, "grad_norm": 109.8191325563643, "learning_rate": 3.894977572083308e-07, "loss": 0.8322, "step": 4720, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.4, "success_rate.epoch.env.math": 0.8, "success_rate.epoch.env.science": 0.5555555555555556, "success_rate.epoch.env_macro_mean": 0.8222222222222222, "success_rate.epoch.global": 0.72, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9980439642324889, "tokens_p.mean_in_band": 0.47028186274509803, "tokens_rate.above_band": 0.9293628808864266, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07063711911357341 }, { "epoch": 1.0066041755432467, "grad_norm": 101.64979341645656, "learning_rate": 3.894746827890482e-07, "loss": 0.699, "step": 4725, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.4, "success_rate.epoch.env.math": 0.8125, "success_rate.epoch.env.science": 0.5416666666666666, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861574074074074, "success_rate.epoch.global": 0.7166666666666667, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9998349471830986, "tokens_p.mean_in_band": 0.44388020833333336, "tokens_rate.above_band": 0.9742710120068611, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025728987993138937 }, { "epoch": 1.0076693651469961, "grad_norm": 71.71592240162255, "learning_rate": 3.8945158688882864e-07, "loss": 0.6099, "step": 4730, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.85, "success_rate.epoch.env.science": 0.5714285714285714, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616402116402117, "success_rate.epoch.global": 0.7285714285714285, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951645496535797, "tokens_p.mean_in_band": 0.6534778225806451, "tokens_rate.above_band": 0.9331896551724138, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0668103448275862 }, { "epoch": 1.0087345547507456, "grad_norm": 123.03568597455691, "learning_rate": 3.8942846952450415e-07, "loss": 0.7393, "step": 4735, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.76, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8437037037037037, "success_rate.epoch.global": 0.675, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.4, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.4666666666666666, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9960756933744221, "tokens_p.mean_in_band": 0.47421875, "tokens_rate.above_band": 0.941944847605225, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05805515239477504 }, { "epoch": 1.009799744354495, "grad_norm": 508.9809006199772, "learning_rate": 3.8940533071292264e-07, "loss": 0.6637, "step": 4740, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.7931034482758621, "success_rate.epoch.env.science": 0.5135135135135135, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8488833661247455, "success_rate.epoch.global": 0.6888888888888889, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948453608247423, "tokens_p.mean_in_band": 0.53515625, "tokens_rate.above_band": 0.9651741293532339, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03482587064676617 }, { "epoch": 1.0108649339582445, "grad_norm": 82.26163557012896, "learning_rate": 3.8938217047094735e-07, "loss": 0.6302, "step": 4745, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.8181818181818182, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651515151515152, "success_rate.epoch.global": 0.71, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977361505681818, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9943502824858758, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005649717514124294 }, { "epoch": 1.011930123561994, "grad_norm": 122.4384358762404, "learning_rate": 3.8935898881545743e-07, "loss": 0.6716, "step": 4750, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8108108108108109, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8810810810810811, "success_rate.epoch.global": 0.7090909090909091, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9906994047619048, "tokens_p.mean_in_band": 0.54931640625, "tokens_rate.above_band": 0.84, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16 }, { "epoch": 1.0129953131657434, "grad_norm": 108.56192483199875, "learning_rate": 3.893357857633474e-07, "loss": 0.6893, "step": 4755, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9090909090909091, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8205128205128205, "success_rate.epoch.env.science": 0.5111111111111111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.874071484071484, "success_rate.epoch.global": 0.7083333333333334, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9989399592944369, "tokens_p.mean_in_band": 0.4557179418103448, "tokens_rate.above_band": 0.980705256154358, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01929474384564205 }, { "epoch": 1.0140605027694929, "grad_norm": 101.09932185155446, "learning_rate": 3.8931256133152754e-07, "loss": 0.6921, "step": 4760, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9166666666666666, "success_rate.epoch.env.logic": 0.45454545454545453, "success_rate.epoch.env.math": 0.8292682926829268, "success_rate.epoch.env.science": 0.52, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720480413895049, "success_rate.epoch.global": 0.7076923076923077, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0006801470588236, "tokens_p.mean_in_band": 0.46293826219512196, "tokens_rate.above_band": 0.9764503159103963, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023549684089603676 }, { "epoch": 1.0151256923732423, "grad_norm": 69.95946542944019, "learning_rate": 3.892893155369236e-07, "loss": 0.5881, "step": 4765, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9230769230769231, "success_rate.epoch.env.logic": 0.46153846153846156, "success_rate.epoch.env.math": 0.8444444444444444, "success_rate.epoch.env.science": 0.5192307692307693, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8748290598290598, "success_rate.epoch.global": 0.7142857142857143, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994926948051948, "tokens_p.mean_in_band": 0.4560810810810811, "tokens_rate.above_band": 0.9614984391259105, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03850156087408949 }, { "epoch": 1.0161908819769918, "grad_norm": 82.82324559157526, "learning_rate": 3.8926604839647696e-07, "loss": 0.5157, "step": 4770, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5333333333333333, "success_rate.epoch.env.math": 0.8541666666666666, "success_rate.epoch.env.science": 0.5357142857142857, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8851785714285715, "success_rate.epoch.global": 0.7266666666666667, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973525747508306, "tokens_p.mean_in_band": 0.4153645833333333, "tokens_rate.above_band": 0.9709677419354839, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02903225806451613 }, { "epoch": 1.0172560715807413, "grad_norm": 95.11877465215886, "learning_rate": 3.892427599271446e-07, "loss": 0.6093, "step": 4775, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8679245283018868, "success_rate.epoch.env.science": 0.5517241379310345, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8848220094804351, "success_rate.epoch.global": 0.7375, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999257719714965, "tokens_p.mean_in_band": 0.46902901785714285, "tokens_rate.above_band": 0.967816091954023, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03218390804597701 }, { "epoch": 1.0183212611844907, "grad_norm": 118.84020836443663, "learning_rate": 3.8921945014589894e-07, "loss": 0.2974, "step": 4780, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.8771929824561403, "success_rate.epoch.env.science": 0.5806451612903226, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8941965127873447, "success_rate.epoch.global": 0.7529411764705882, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974197247706422, "tokens_p.mean_in_band": 0.6588541666666666, "tokens_rate.above_band": 0.960352422907489, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039647577092511016 }, { "epoch": 1.0193864507882404, "grad_norm": 147.37522023409903, "learning_rate": 3.89196119069728e-07, "loss": 0.4447, "step": 4785, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.8852459016393442, "success_rate.epoch.env.science": 0.578125, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8970889698631825, "success_rate.epoch.global": 0.7611111111111111, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986940298507463, "tokens_p.mean_in_band": 0.5078125, "tokens_rate.above_band": 0.9970238095238095, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002976190476190476 }, { "epoch": 1.0204516403919899, "grad_norm": 32.429467066288986, "learning_rate": 3.891727667156353e-07, "loss": 0.3183, "step": 4790, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6190476190476191, "success_rate.epoch.env.math": 0.890625, "success_rate.epoch.env.science": 0.5909090909090909, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9029153138528139, "success_rate.epoch.global": 0.7736842105263158, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992270318021201, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9860627177700348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013937282229965157 }, { "epoch": 1.0215168299957393, "grad_norm": 50.749797732298596, "learning_rate": 3.8914939310063983e-07, "loss": 0.4503, "step": 4795, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6521739130434783, "success_rate.epoch.env.math": 0.8955223880597015, "success_rate.epoch.env.science": 0.6, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9076267729674609, "success_rate.epoch.global": 0.78, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955481843575419, "tokens_p.mean_in_band": 0.5564236111111112, "tokens_rate.above_band": 0.9754768392370572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02452316076294278 }, { "epoch": 1.0225820195994888, "grad_norm": 90.48850129165183, "learning_rate": 3.891259982417761e-07, "loss": 0.5427, "step": 4800, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.68, "success_rate.epoch.env.math": 0.8873239436619719, "success_rate.epoch.env.science": 0.5945945945945946, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9090489966827995, "success_rate.epoch.global": 0.7761904761904762, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.991875, "tokens_p.mean_in_band": 0.3108723958333333, "tokens_rate.above_band": 0.8620689655172413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13793103448275862 }, { "epoch": 1.0236472092032383, "grad_norm": 76.460497999413, "learning_rate": 3.891025821560942e-07, "loss": 0.5584, "step": 4805, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6923076923076923, "success_rate.epoch.env.math": 0.8947368421052632, "success_rate.epoch.env.science": 0.5974025974025974, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9113018560386982, "success_rate.epoch.global": 0.7818181818181819, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943484042553191, "tokens_p.mean_in_band": 0.5876736111111112, "tokens_rate.above_band": 0.912621359223301, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08737864077669903 }, { "epoch": 1.0247123988069877, "grad_norm": 78.17021514718782, "learning_rate": 3.890791448606596e-07, "loss": 0.3499, "step": 4810, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.7037037037037037, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.science": 0.5875, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9119775132275132, "success_rate.epoch.global": 0.7816593886462883, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9948224852071006, "tokens_p.mean_in_band": 0.4973958333333333, "tokens_rate.above_band": 0.9337016574585635, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06629834254143646 }, { "epoch": 1.0257775884107372, "grad_norm": 92.1443658600186, "learning_rate": 3.8905568637255313e-07, "loss": 0.5582, "step": 4815, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.7037037037037037, "success_rate.epoch.env.math": 0.8902439024390244, "success_rate.epoch.env.science": 0.5903614457831325, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.912180905192586, "success_rate.epoch.global": 0.7824267782426778, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980757389162561, "tokens_p.mean_in_band": 0.5909090909090909, "tokens_rate.above_band": 0.973621103117506, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026378896882494004 }, { "epoch": 1.0268427780144866, "grad_norm": 223.73644024574008, "learning_rate": 3.890322067088712e-07, "loss": 0.2925, "step": 4820, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.7241379310344828, "success_rate.epoch.env.math": 0.8941176470588236, "success_rate.epoch.env.science": 0.5930232558139535, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.914877883390726, "success_rate.epoch.global": 0.7871485943775101, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957096474953617, "tokens_p.mean_in_band": 0.74765625, "tokens_rate.above_band": 0.9817850637522769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018214936247723135 }, { "epoch": 1.027907967618236, "grad_norm": 70.5536113220452, "learning_rate": 3.890087058867258e-07, "loss": 0.4808, "step": 4825, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.7333333333333333, "success_rate.epoch.env.math": 0.8953488372093024, "success_rate.epoch.env.science": 0.6067415730337079, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9179868188020788, "success_rate.epoch.global": 0.7953667953667953, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998694120505345, "tokens_p.mean_in_band": 0.818359375, "tokens_rate.above_band": 0.9961277831558567, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003872216844143272 }, { "epoch": 1.0289731572219856, "grad_norm": 30.762366799003015, "learning_rate": 3.8898518392324393e-07, "loss": 0.4171, "step": 4830, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.7419354838709677, "success_rate.epoch.env.math": 0.898876404494382, "success_rate.epoch.env.science": 0.6105263157894737, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9195782648599268, "success_rate.epoch.global": 0.7955390334572491, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9913651315789473, "tokens_p.mean_in_band": 0.4079861111111111, "tokens_rate.above_band": 0.8941176470588236, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10588235294117647 }, { "epoch": 1.030038346825735, "grad_norm": 146.6021634374276, "learning_rate": 3.889616408355683e-07, "loss": 0.4162, "step": 4835, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7575757575757576, "success_rate.epoch.env.math": 0.9010989010989011, "success_rate.epoch.env.science": 0.6, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9206043079727291, "success_rate.epoch.global": 0.7921146953405018, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976250791640279, "tokens_p.mean_in_band": 0.55125, "tokens_rate.above_band": 0.9844139650872819, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015586034912718205 }, { "epoch": 1.0311035364294845, "grad_norm": 24.174367018000552, "learning_rate": 3.889380766408569e-07, "loss": 0.3678, "step": 4840, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7575757575757576, "success_rate.epoch.env.math": 0.9052631578947369, "success_rate.epoch.env.science": 0.6132075471698113, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9223414883692937, "success_rate.epoch.global": 0.7958477508650519, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9166666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942010309278351, "tokens_p.mean_in_band": 0.560546875, "tokens_rate.above_band": 0.9238095238095239, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0761904761904762 }, { "epoch": 1.032168726033234, "grad_norm": 153.3969011372809, "learning_rate": 3.8891449135628333e-07, "loss": 0.3122, "step": 4845, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7777777777777778, "success_rate.epoch.env.math": 0.9072164948453608, "success_rate.epoch.env.science": 0.6018518518518519, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9234214545527621, "success_rate.epoch.global": 0.7959866220735786, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977448453608248, "tokens_p.mean_in_band": 0.3815789473684211, "tokens_rate.above_band": 0.9107981220657277, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0892018779342723 }, { "epoch": 1.0332339156369834, "grad_norm": 82.50481899376963, "learning_rate": 3.8889088499903615e-07, "loss": 0.3598, "step": 4850, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7837837837837838, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.science": 0.6052631578947368, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.924550627182206, "success_rate.epoch.global": 0.7961165048543689, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926985981308412, "tokens_p.mean_in_band": 0.466796875, "tokens_rate.above_band": 0.8699186991869918, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13008130081300814 }, { "epoch": 1.0342991052407329, "grad_norm": 56.89804470088932, "learning_rate": 3.8886725758631956e-07, "loss": 0.5136, "step": 4855, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.7948717948717948, "success_rate.epoch.env.math": 0.9117647058823529, "success_rate.epoch.env.science": 0.6153846153846154, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9272021116138763, "success_rate.epoch.global": 0.8025078369905956, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999169921875, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.0353642948444823, "grad_norm": 129.15884002225232, "learning_rate": 3.8884360913535316e-07, "loss": 0.4229, "step": 4860, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8, "success_rate.epoch.env.math": 0.9074074074074074, "success_rate.epoch.env.science": 0.625, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9282407407407408, "success_rate.epoch.global": 0.8054711246200608, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995398773006135, "tokens_p.mean_in_band": 0.721875, "tokens_rate.above_band": 0.9702380952380952, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02976190476190476 }, { "epoch": 1.0364294844482318, "grad_norm": 51.30541733272511, "learning_rate": 3.888199396633717e-07, "loss": 0.3944, "step": 4865, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.813953488372093, "success_rate.epoch.env.math": 0.9009009009009009, "success_rate.epoch.env.science": 0.6341463414634146, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.929900073073641, "success_rate.epoch.global": 0.8082595870206489, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989530988274706, "tokens_p.mean_in_band": 0.49462890625, "tokens_rate.above_band": 0.9867768595041322, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013223140495867768 }, { "epoch": 1.0374946740519813, "grad_norm": 97.00191312932553, "learning_rate": 3.887962491876253e-07, "loss": 0.4166, "step": 4870, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.813953488372093, "success_rate.epoch.env.math": 0.9026548672566371, "success_rate.epoch.env.science": 0.640625, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.930723335562873, "success_rate.epoch.global": 0.8108882521489972, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976181402439024, "tokens_p.mean_in_band": 0.584375, "tokens_rate.above_band": 0.9924357034795764, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007564296520423601 }, { "epoch": 1.0385598636557307, "grad_norm": 559.6773731772412, "learning_rate": 3.887725377253794e-07, "loss": 0.3291, "step": 4875, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8, "success_rate.epoch.env.math": 0.9051724137931034, "success_rate.epoch.env.science": 0.6307692307692307, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9290487099107789, "success_rate.epoch.global": 0.807799442896936, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9980066872427984, "tokens_p.mean_in_band": 0.553886217948718, "tokens_rate.above_band": 0.9739478957915831, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026052104208416832 }, { "epoch": 1.0396250532594802, "grad_norm": 97.8107143834208, "learning_rate": 3.887488052939148e-07, "loss": 0.4862, "step": 4880, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8125, "success_rate.epoch.env.math": 0.9067796610169492, "success_rate.epoch.env.science": 0.6444444444444445, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9318269560006849, "success_rate.epoch.global": 0.8130081300813008, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933252427184466, "tokens_p.mean_in_band": 0.725, "tokens_rate.above_band": 0.911504424778761, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08849557522123894 }, { "epoch": 1.0406902428632296, "grad_norm": 64.6257125792207, "learning_rate": 3.8872505191052755e-07, "loss": 0.3941, "step": 4885, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8163265306122449, "success_rate.epoch.env.math": 0.907563025210084, "success_rate.epoch.env.science": 0.6453900709219859, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9323825081289769, "success_rate.epoch.global": 0.8126649076517151, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986620795107034, "tokens_p.mean_in_band": 0.5478515625, "tokens_rate.above_band": 0.9533527696793003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04664723032069971 }, { "epoch": 1.041755432466979, "grad_norm": 65.91021777134186, "learning_rate": 3.8870127759252883e-07, "loss": 0.4176, "step": 4890, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.82, "success_rate.epoch.env.math": 0.9098360655737705, "success_rate.epoch.env.science": 0.636986301369863, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9321367821489087, "success_rate.epoch.global": 0.8097686375321337, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9957107843137255, "tokens_p.mean_in_band": 0.530078125, "tokens_rate.above_band": 0.884393063583815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11560693641618497 }, { "epoch": 1.0428206220707286, "grad_norm": 138.3148173872502, "learning_rate": 3.886774823572453e-07, "loss": 0.3484, "step": 4895, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8235294117647058, "success_rate.epoch.env.math": 0.912, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.64, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9392773773541037, "success_rate.epoch.global": 0.8120300751879699, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921465968586387, "tokens_p.mean_below_band": 4.602043190971017e-10, "tokens_p.mean_in_band": 0.7078125, "tokens_rate.above_band": 0.9455445544554455, "tokens_rate.below_band": 0.0049504950495049506, "tokens_rate.in_band": 0.04950495049504951 }, { "epoch": 1.043885811674478, "grad_norm": 163.97837891873726, "learning_rate": 3.8865366622201865e-07, "loss": 0.4808, "step": 4900, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8269230769230769, "success_rate.epoch.env.math": 0.9069767441860465, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6516129032258065, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9401849512241239, "success_rate.epoch.global": 0.8141809290953546, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9871134020618557, "tokens_p.mean_in_band": 0.64501953125, "tokens_rate.above_band": 0.9238095238095239, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0761904761904762 }, { "epoch": 1.0449510012782275, "grad_norm": 82.54680181501901, "learning_rate": 3.8862982920420595e-07, "loss": 0.4513, "step": 4905, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9583333333333334, "success_rate.epoch.env.logic": 0.8148148148148148, "success_rate.epoch.env.math": 0.9083969465648855, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6477987421383647, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9390312578955817, "success_rate.epoch.global": 0.8114558472553699, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.000828313253012, "tokens_p.mean_in_band": 0.4991179435483871, "tokens_rate.above_band": 0.9639953542392566, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036004645760743324 }, { "epoch": 1.046016190881977, "grad_norm": 85.50658915223514, "learning_rate": 3.886059713211795e-07, "loss": 0.3659, "step": 4910, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.96, "success_rate.epoch.env.logic": 0.8245614035087719, "success_rate.epoch.env.math": 0.9097744360902256, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6481481481481481, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9402258170679224, "success_rate.epoch.global": 0.8135198135198135, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967447916666666, "tokens_p.mean_in_band": 0.6534090909090909, "tokens_rate.above_band": 0.9751693002257337, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024830699774266364 }, { "epoch": 1.0470813804857264, "grad_norm": 72.46491896122384, "learning_rate": 3.8858209259032677e-07, "loss": 0.3633, "step": 4915, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.819672131147541, "success_rate.epoch.env.math": 0.9104477611940298, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6545454545454545, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9405639825841352, "success_rate.epoch.global": 0.8154897494305239, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992008196721311, "tokens_p.mean_in_band": 0.46510416666666665, "tokens_rate.above_band": 0.9807073954983923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01929260450160772 }, { "epoch": 1.0481465700894759, "grad_norm": 204.420510141689, "learning_rate": 3.885581930290503e-07, "loss": 0.4072, "step": 4920, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.828125, "success_rate.epoch.env.math": 0.9117647058823529, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6506024096385542, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9412231889530791, "success_rate.epoch.global": 0.8173719376391982, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993636877828054, "tokens_p.mean_in_band": 0.728125, "tokens_rate.above_band": 0.9888143176733781, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011185682326621925 }, { "epoch": 1.0492117596932253, "grad_norm": 84.8321401295415, "learning_rate": 3.88534272654768e-07, "loss": 0.4457, "step": 4925, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.828125, "success_rate.epoch.env.math": 0.9148936170212766, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6488095238095238, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8958901003448876, "success_rate.epoch.global": 0.8169934640522876, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953044041450777, "tokens_p.mean_in_band": 0.516858552631579, "tokens_rate.above_band": 0.910377358490566, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08962264150943396 }, { "epoch": 1.0502769492969748, "grad_norm": 104.41239993017396, "learning_rate": 3.8851033148491277e-07, "loss": 0.36, "step": 4930, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8307692307692308, "success_rate.epoch.env.math": 0.916083916083916, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6514285714285715, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8964767892040619, "success_rate.epoch.global": 0.8166311300639659, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9944661458333334, "tokens_p.mean_in_band": 0.6220703125, "tokens_rate.above_band": 0.9230769230769231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07692307692307693 }, { "epoch": 1.0513421389007243, "grad_norm": 58.52264459697979, "learning_rate": 3.8848636953693293e-07, "loss": 0.3306, "step": 4935, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8333333333333334, "success_rate.epoch.env.math": 0.9183673469387755, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6611111111111111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.897797704940562, "success_rate.epoch.global": 0.8204592901878914, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919241573033708, "tokens_p.mean_in_band": 0.8388671875, "tokens_rate.above_band": 0.956989247311828, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043010752688172046 }, { "epoch": 1.0524073285044737, "grad_norm": 104.52778488318644, "learning_rate": 3.8846238682829165e-07, "loss": 0.3454, "step": 4940, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8208955223880597, "success_rate.epoch.env.math": 0.9205298013245033, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6593406593406593, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8967026314560168, "success_rate.epoch.global": 0.820040899795501, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987864077669902, "tokens_p.mean_in_band": 0.5056818181818182, "tokens_rate.above_band": 0.965625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034375 }, { "epoch": 1.0534725181082232, "grad_norm": 44.45873111286777, "learning_rate": 3.8843838337646735e-07, "loss": 0.383, "step": 4945, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8235294117647058, "success_rate.epoch.env.math": 0.9210526315789473, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6595744680851063, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8970108613083383, "success_rate.epoch.global": 0.8196392785571143, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9900662251655629, "tokens_p.mean_in_band": 0.5162259615384616, "tokens_rate.above_band": 0.9207317073170732, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07926829268292683 }, { "epoch": 1.0545377077119726, "grad_norm": 147.0472720252295, "learning_rate": 3.8841435919895364e-07, "loss": 0.259, "step": 4950, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8285714285714286, "success_rate.epoch.env.math": 0.9225806451612903, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6614583333333334, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8977793972753649, "success_rate.epoch.global": 0.8212180746561886, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978932584269663, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9816176470588235, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01838235294117647 }, { "epoch": 1.055602897315722, "grad_norm": 21.701993238466507, "learning_rate": 3.883903143132592e-07, "loss": 0.2104, "step": 4955, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9642857142857143, "success_rate.epoch.env.logic": 0.8309859154929577, "success_rate.epoch.env.math": 0.9240506329113924, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6615384615384615, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8831085506874419, "success_rate.epoch.global": 0.8208092485549133, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9921052631578947, "tokens_p.mean_in_band": 0.7532784598214286, "tokens_rate.above_band": 0.9223300970873787, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07766990291262135 }, { "epoch": 1.0566680869194716, "grad_norm": 157.59482989962441, "learning_rate": 3.8836624873690757e-07, "loss": 0.3333, "step": 4960, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9642857142857143, "success_rate.epoch.env.logic": 0.8356164383561644, "success_rate.epoch.env.math": 0.9245283018867925, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6716417910447762, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8844914162642528, "success_rate.epoch.global": 0.8241965973534972, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998, "tokens_p.mean_in_band": 0.633203125, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 1.057733276523221, "grad_norm": 48.53878482620712, "learning_rate": 3.883421624874378e-07, "loss": 0.2178, "step": 4965, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9655172413793104, "success_rate.epoch.env.logic": 0.8356164383561644, "success_rate.epoch.env.math": 0.91875, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6842105263157895, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8852206853986, "success_rate.epoch.global": 0.8256029684601113, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958333333333333, "tokens_p.mean_in_band": 0.7588975694444444, "tokens_rate.above_band": 0.9693877551020408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030612244897959183 }, { "epoch": 1.0587984661269707, "grad_norm": 88.66200637255409, "learning_rate": 3.8831805558240367e-07, "loss": 0.2159, "step": 4970, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9655172413793104, "success_rate.epoch.env.logic": 0.8356164383561644, "success_rate.epoch.env.math": 0.9212121212121213, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6872037914691943, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8857166296136477, "success_rate.epoch.global": 0.8287795992714025, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974376114081996, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.0598636557307202, "grad_norm": 221.96347487104146, "learning_rate": 3.882939280393741e-07, "loss": 0.4282, "step": 4975, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9666666666666667, "success_rate.epoch.env.logic": 0.8356164383561644, "success_rate.epoch.env.math": 0.9216867469879518, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6912442396313364, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8862315840886776, "success_rate.epoch.global": 0.8297491039426523, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9967749110320284, "tokens_p.mean_in_band": 0.6456801470588235, "tokens_rate.above_band": 0.9706390328151986, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02936096718480138 }, { "epoch": 1.0609288453344696, "grad_norm": 89.16341137533541, "learning_rate": 3.8826977987593314e-07, "loss": 0.2891, "step": 4980, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9666666666666667, "success_rate.epoch.env.logic": 0.84, "success_rate.epoch.env.math": 0.9221556886227545, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6936936936936937, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8868953983924045, "success_rate.epoch.global": 0.8309859154929577, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976652298850575, "tokens_p.mean_in_band": 0.6651785714285714, "tokens_rate.above_band": 0.9802816901408451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01971830985915493 }, { "epoch": 1.061994034938219, "grad_norm": 91.32410024472037, "learning_rate": 3.882456111096797e-07, "loss": 0.3006, "step": 4985, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.967741935483871, "success_rate.epoch.env.logic": 0.84, "success_rate.epoch.env.math": 0.9226190476190477, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6902654867256637, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8867236184692652, "success_rate.epoch.global": 0.8304498269896193, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995547385620915, "tokens_p.mean_in_band": 0.646728515625, "tokens_rate.above_band": 0.9896507115135834, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01034928848641656 }, { "epoch": 1.0630592245419686, "grad_norm": 62.97554115020096, "learning_rate": 3.8822142175822793e-07, "loss": 0.2876, "step": 4990, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.967741935483871, "success_rate.epoch.env.logic": 0.84, "success_rate.epoch.env.math": 0.9239766081871345, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6926406926406926, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8870629608768211, "success_rate.epoch.global": 0.8316326530612245, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937845303867403, "tokens_p.mean_in_band": 0.5815972222222222, "tokens_rate.above_band": 0.9526315789473684, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04736842105263158 }, { "epoch": 1.064124414145718, "grad_norm": 111.485715575201, "learning_rate": 3.881972118392068e-07, "loss": 0.2385, "step": 4995, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8421052631578947, "success_rate.epoch.env.math": 0.9252873563218391, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6949152542372882, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8848310188227596, "success_rate.epoch.global": 0.8311036789297659, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9884868421052632, "tokens_p.mean_below_band": 9.424984455108643e-07, "tokens_p.mean_in_band": 0.47788438688212925, "tokens_rate.above_band": 0.7373134328358208, "tokens_rate.below_band": 0.0009950248756218905, "tokens_rate.in_band": 0.26169154228855723 }, { "epoch": 1.0651896037494675, "grad_norm": 44.55584774499437, "learning_rate": 3.8817298137026026e-07, "loss": 0.1276, "step": 5000, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8441558441558441, "success_rate.epoch.env.math": 0.9257142857142857, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7049180327868853, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8859655905445772, "success_rate.epoch.global": 0.8338815789473685, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9897540983606558, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9838709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016129032258064516 }, { "epoch": 1.066254793353217, "grad_norm": 37.44887426044418, "learning_rate": 3.881487303690473e-07, "loss": 0.3014, "step": 5005, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8518518518518519, "success_rate.epoch.env.math": 0.9261363636363636, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7096774193548387, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8871362698342171, "success_rate.epoch.global": 0.8365695792880259, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974112426035503, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9941176470588236, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0058823529411764705 }, { "epoch": 1.0673199829569664, "grad_norm": 63.786733274596294, "learning_rate": 3.8812445885324205e-07, "loss": 0.162, "step": 5010, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8518518518518519, "success_rate.epoch.env.math": 0.9269662921348315, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7086614173228346, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8871193540584411, "success_rate.epoch.global": 0.835725677830941, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9939453125, "tokens_p.mean_in_band": 0.5845424107142857, "tokens_rate.above_band": 0.9195402298850575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08045977011494253 }, { "epoch": 1.0683851725607159, "grad_norm": 97.63486070867486, "learning_rate": 3.881001668405332e-07, "loss": 0.2328, "step": 5015, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8536585365853658, "success_rate.epoch.env.math": 0.9297297297297298, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7109375, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8877417363316755, "success_rate.epoch.global": 0.8383045525902669, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9896875, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9803921568627451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0196078431372549 }, { "epoch": 1.0694503621644653, "grad_norm": 156.34628875119938, "learning_rate": 3.8807585434862475e-07, "loss": 0.3879, "step": 5020, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8554216867469879, "success_rate.epoch.env.math": 0.93048128342246, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7115384615384616, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.888024978640113, "success_rate.epoch.global": 0.839258114374034, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966384462151394, "tokens_p.mean_in_band": 0.4739583333333333, "tokens_rate.above_band": 0.9881889763779528, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011811023622047244 }, { "epoch": 1.0705155517682148, "grad_norm": 59.72047243352351, "learning_rate": 3.8805152139523536e-07, "loss": 0.2054, "step": 5025, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8571428571428571, "success_rate.epoch.env.math": 0.9315789473684211, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7132075471698113, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8884329713649475, "success_rate.epoch.global": 0.8401826484018264, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9448818897637795, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05511811023622047 }, { "epoch": 1.0715807413719642, "grad_norm": 816.1240272641251, "learning_rate": 3.8802716799809885e-07, "loss": 0.3993, "step": 5030, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8505747126436781, "success_rate.epoch.env.math": 0.9270833333333334, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7111111111111111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8872365900383141, "success_rate.epoch.global": 0.8365817091454273, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5888888888888889, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0000521702838063, "tokens_p.mean_in_band": 0.4214564732142857, "tokens_rate.above_band": 0.9553429027113237, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044657097288676235 }, { "epoch": 1.0726459309757137, "grad_norm": 154.32847542752535, "learning_rate": 3.880027941749636e-07, "loss": 0.1867, "step": 5035, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8571428571428571, "success_rate.epoch.env.math": 0.9285714285714286, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7111111111111111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8879689754689756, "success_rate.epoch.global": 0.8387573964497042, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972278225806451, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.0737111205794632, "grad_norm": 94.40867589017665, "learning_rate": 3.8797839994359325e-07, "loss": 0.3767, "step": 5040, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.8586956521739131, "success_rate.epoch.env.math": 0.9292929292929293, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7090909090909091, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8881642512077296, "success_rate.epoch.global": 0.8381924198250729, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947076612903226, "tokens_p.mean_in_band": 0.5738636363636364, "tokens_rate.above_band": 0.9712793733681462, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028720626631853787 }, { "epoch": 1.0747763101832126, "grad_norm": 79.92737662068915, "learning_rate": 3.8795398532176604e-07, "loss": 0.2173, "step": 5045, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8602150537634409, "success_rate.epoch.env.math": 0.9303482587064676, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7132616487455197, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8889395241033635, "success_rate.epoch.global": 0.8405172413793104, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982638888888888, "tokens_p.mean_in_band": 0.611328125, "tokens_rate.above_band": 0.9908256880733946, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009174311926605505 }, { "epoch": 1.075841499786962, "grad_norm": 43.49103723119927, "learning_rate": 3.879295503272753e-07, "loss": 0.2848, "step": 5050, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8617021276595744, "success_rate.epoch.env.math": 0.9310344827586207, "success_rate.epoch.env.sat": 0.25, "success_rate.epoch.env.science": 0.7183098591549296, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8820202672873964, "success_rate.epoch.global": 0.8413597733711048, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980945121951219, "tokens_p.mean_in_band": 0.6932744565217391, "tokens_rate.above_band": 0.9661266568483063, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033873343151693665 }, { "epoch": 1.0769066893907115, "grad_norm": 218.3599509629281, "learning_rate": 3.879050949779289e-07, "loss": 0.19, "step": 5055, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8645833333333334, "success_rate.epoch.env.math": 0.9313725490196079, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7222222222222222, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8781231431966726, "success_rate.epoch.global": 0.8421787709497207, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960164835164835, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.974304068522484, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02569593147751606 }, { "epoch": 1.077971878994461, "grad_norm": 0.0, "learning_rate": 3.878806192915498e-07, "loss": 0.1457, "step": 5060, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.865979381443299, "success_rate.epoch.env.math": 0.9320388349514563, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7241379310344828, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8787818719885165, "success_rate.epoch.global": 0.8443526170798898, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947959183673469, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9983700081499592, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016299918500407497 }, { "epoch": 1.0790370685982105, "grad_norm": 56.46685685501144, "learning_rate": 3.878561232859758e-07, "loss": 0.1699, "step": 5065, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.865979381443299, "success_rate.epoch.env.math": 0.9333333333333333, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7226027397260274, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8787599908133731, "success_rate.epoch.global": 0.845108695652174, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983498349834984, "tokens_p.mean_in_band": 0.58203125, "tokens_rate.above_band": 0.9869706840390879, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013029315960912053 }, { "epoch": 1.08010225820196, "grad_norm": 97.10832917549202, "learning_rate": 3.8783160697905923e-07, "loss": 0.2793, "step": 5070, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.865979381443299, "success_rate.epoch.env.math": 0.933649289099526, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7263513513513513, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8793953129951643, "success_rate.epoch.global": 0.8471849865951743, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992919670442842, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9989711934156379, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00102880658436214 }, { "epoch": 1.0811674478057094, "grad_norm": 61.785518551137216, "learning_rate": 3.878070703886676e-07, "loss": 0.2096, "step": 5075, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8585858585858586, "success_rate.epoch.env.math": 0.9348837209302325, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7266666666666667, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8788640606577626, "success_rate.epoch.global": 0.8465608465608465, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948275862068966, "tokens_p.mean_in_band": 0.5502232142857143, "tokens_rate.above_band": 0.9539473684210527, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046052631578947366 }, { "epoch": 1.0822326374094589, "grad_norm": 2.873540350938449, "learning_rate": 3.8778251353268283e-07, "loss": 0.2163, "step": 5080, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8627450980392157, "success_rate.epoch.env.math": 0.9360730593607306, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7284768211920529, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8795148545131481, "success_rate.epoch.global": 0.8485639686684073, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996408045977011, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9775280898876404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02247191011235955 }, { "epoch": 1.0832978270132083, "grad_norm": 96.3451665318494, "learning_rate": 3.8775793642900186e-07, "loss": 0.2786, "step": 5085, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8627450980392157, "success_rate.epoch.env.math": 0.9372197309417041, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7296416938110749, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8797249948949659, "success_rate.epoch.global": 0.8492268041237113, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9920804794520548, "tokens_p.mean_in_band": 0.621484375, "tokens_rate.above_band": 0.9358974358974359, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0641025641025641 }, { "epoch": 1.0843630166169578, "grad_norm": 224.72004536662845, "learning_rate": 3.877333390955363e-07, "loss": 0.3518, "step": 5090, "success_rate.epoch.env.abd": 0.9767441860465116, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8640776699029126, "success_rate.epoch.env.math": 0.9380530973451328, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7322580645161291, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8781682696844212, "success_rate.epoch.global": 0.8498727735368957, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972519729425028, "tokens_p.mean_below_band": 2.812594175338745e-07, "tokens_p.mean_in_band": 0.10240558769463667, "tokens_rate.above_band": 0.2765824758341129, "tokens_rate.below_band": 0.002494543186778921, "tokens_rate.in_band": 0.7209229809791082 }, { "epoch": 1.0854282062207072, "grad_norm": 63.17554418013749, "learning_rate": 3.8770872155021253e-07, "loss": 0.1439, "step": 5095, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8666666666666667, "success_rate.epoch.env.math": 0.9385964912280702, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7348242811501597, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8757040710638399, "success_rate.epoch.global": 0.8505025125628141, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986313868613139, "tokens_p.mean_in_band": 0.6607142857142857, "tokens_rate.above_band": 0.9670588235294117, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03294117647058824 }, { "epoch": 1.0864933958244567, "grad_norm": 65.12427258618388, "learning_rate": 3.8768408381097155e-07, "loss": 0.3441, "step": 5100, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8691588785046729, "success_rate.epoch.env.math": 0.9388646288209607, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7398119122257053, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.876408432928062, "success_rate.epoch.global": 0.8523573200992556, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962818696883853, "tokens_p.mean_in_band": 0.7907366071428571, "tokens_rate.above_band": 0.9805555555555555, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019444444444444445 }, { "epoch": 1.0875585854282062, "grad_norm": 230.26982974330005, "learning_rate": 3.876594258957693e-07, "loss": 0.3642, "step": 5105, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8691588785046729, "success_rate.epoch.env.math": 0.9399141630901288, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7376543209876543, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8763077004763453, "success_rate.epoch.global": 0.8517156862745098, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975124378109452, "tokens_p.mean_in_band": 0.5914306640625, "tokens_rate.above_band": 0.9617224880382775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03827751196172249 }, { "epoch": 1.0886237750319556, "grad_norm": 67.96402510385181, "learning_rate": 3.876347478225762e-07, "loss": 0.2941, "step": 5110, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8703703703703703, "success_rate.epoch.env.math": 0.9409282700421941, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7384615384615385, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8765834110483133, "success_rate.epoch.global": 0.8535108958837773, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983898527865405, "tokens_p.mean_in_band": 0.734375, "tokens_rate.above_band": 0.9968553459119497, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0031446540880503146 }, { "epoch": 1.089688964635705, "grad_norm": 135.17207102464585, "learning_rate": 3.876100496093775e-07, "loss": 0.1945, "step": 5115, "success_rate.epoch.env.abd": 0.9777777777777777, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8703703703703703, "success_rate.epoch.env.math": 0.9416666666666667, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7370030581039755, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.876680412689587, "success_rate.epoch.global": 0.854066985645933, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999553252323088, "tokens_p.mean_in_band": 0.5703125, "tokens_rate.above_band": 0.9985724482512491, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014275517487508922 }, { "epoch": 1.0907541542394545, "grad_norm": 160.17105370646595, "learning_rate": 3.8758533127417297e-07, "loss": 0.4869, "step": 5120, "success_rate.epoch.env.abd": 0.9782608695652174, "success_rate.epoch.env.agentgym:alfworld": 0.96, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8703703703703703, "success_rate.epoch.env.math": 0.941908713692946, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7409638554216867, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8735809079920007, "success_rate.epoch.global": 0.8546099290780141, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993977516059958, "tokens_p.mean_in_band": 0.69453125, "tokens_rate.above_band": 0.9946751863684771, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005324813631522897 }, { "epoch": 1.091819343843204, "grad_norm": 206.12375796428358, "learning_rate": 3.8756059283497733e-07, "loss": 0.3018, "step": 5125, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.96, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.8715596330275229, "success_rate.epoch.env.math": 0.9385245901639344, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7425149700598802, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8737706942060314, "success_rate.epoch.global": 0.8551401869158879, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976498983739838, "tokens_p.mean_in_band": 0.625, "tokens_rate.above_band": 0.9959514170040485, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004048582995951417 }, { "epoch": 1.0928845334469535, "grad_norm": 53.4426212461758, "learning_rate": 3.875358343098197e-07, "loss": 0.1871, "step": 5130, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.8738738738738738, "success_rate.epoch.env.math": 0.94, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7432835820895523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8743249418651723, "success_rate.epoch.global": 0.8568129330254042, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955778301886793, "tokens_p.mean_in_band": 0.802734375, "tokens_rate.above_band": 0.9814814814814815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018518518518518517 }, { "epoch": 1.093949723050703, "grad_norm": 110.0774637185527, "learning_rate": 3.875110557167438e-07, "loss": 0.2661, "step": 5135, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8738738738738738, "success_rate.epoch.env.math": 0.9407114624505929, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7419354838709677, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8743631642910306, "success_rate.epoch.global": 0.8561643835616438, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942103794642857, "tokens_p.mean_in_band": 0.614990234375, "tokens_rate.above_band": 0.9655172413793104, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034482758620689655 }, { "epoch": 1.0950149126544524, "grad_norm": 238.6272099337383, "learning_rate": 3.8748625707380824e-07, "loss": 0.2169, "step": 5140, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8738738738738738, "success_rate.epoch.env.math": 0.94140625, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7435158501440923, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8745699964567152, "success_rate.epoch.global": 0.8566591422121896, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955658783783784, "tokens_p.mean_in_band": 0.23177083333333334, "tokens_rate.above_band": 0.9801324503311258, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019867549668874173 }, { "epoch": 1.096080102258202, "grad_norm": 110.67437073230069, "learning_rate": 3.8746143839908606e-07, "loss": 0.3265, "step": 5145, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.8738738738738738, "success_rate.epoch.env.math": 0.9420849420849421, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7435897435897436, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8729597889172358, "success_rate.epoch.global": 0.8560267857142857, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9938991323210412, "tokens_p.mean_in_band": 0.640126329787234, "tokens_rate.above_band": 0.90748031496063, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09251968503937008 }, { "epoch": 1.0971452918619513, "grad_norm": 182.92325646847095, "learning_rate": 3.874365997106649e-07, "loss": 0.5109, "step": 5150, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.8738738738738738, "success_rate.epoch.env.math": 0.9425287356321839, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7478991596638656, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8733918988828143, "success_rate.epoch.global": 0.8576158940397351, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954166666666666, "tokens_p.mean_in_band": 0.80859375, "tokens_rate.above_band": 0.9782608695652174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021739130434782608 }, { "epoch": 1.098210481465701, "grad_norm": 124.94864449011877, "learning_rate": 3.874117410266471e-07, "loss": 0.389, "step": 5155, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8672566371681416, "success_rate.epoch.env.math": 0.9431818181818182, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.75, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8731724408412103, "success_rate.epoch.global": 0.8580786026200873, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973897345823576, "tokens_p.mean_in_band": 0.7161458333333334, "tokens_rate.above_band": 0.9953379953379954, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004662004662004662 }, { "epoch": 1.0992756710694505, "grad_norm": 109.79976932617627, "learning_rate": 3.8738686236514937e-07, "loss": 0.2852, "step": 5160, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8672566371681416, "success_rate.epoch.env.math": 0.943609022556391, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.75, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8732112776025349, "success_rate.epoch.global": 0.857451403887689, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9884020618556701, "tokens_p.mean_in_band": 0.4296875, "tokens_rate.above_band": 0.941747572815534, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05825242718446602 }, { "epoch": 1.1003408606732, "grad_norm": 84.99864019904389, "learning_rate": 3.873619637443031e-07, "loss": 0.2377, "step": 5165, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8695652173913043, "success_rate.epoch.env.math": 0.9438202247191011, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7506702412868632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8706874269407492, "success_rate.epoch.global": 0.8568376068376068, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992421540656206, "tokens_p.mean_in_band": 0.390625, "tokens_rate.above_band": 0.9915134370579916, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008486562942008486 }, { "epoch": 1.1014060502769494, "grad_norm": 60.99410794911759, "learning_rate": 3.873370451822544e-07, "loss": 0.2666, "step": 5170, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9361702127659575, "success_rate.epoch.env.logic": 0.8695652173913043, "success_rate.epoch.env.math": 0.9446494464944649, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7506631299734748, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708883100800473, "success_rate.epoch.global": 0.857293868921776, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936625874125874, "tokens_p.mean_in_band": 0.6428571428571429, "tokens_rate.above_band": 0.9761092150170648, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023890784982935155 }, { "epoch": 1.1024712398806988, "grad_norm": 40.62948199770087, "learning_rate": 3.8731210669716355e-07, "loss": 0.2366, "step": 5175, "success_rate.epoch.env.abd": 0.9791666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.865546218487395, "success_rate.epoch.env.math": 0.9448529411764706, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.75, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8706423478482302, "success_rate.epoch.global": 0.856694560669456, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8833333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994550511124474, "tokens_p.mean_in_band": 0.5189393939393939, "tokens_rate.above_band": 0.9805424528301887, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01945754716981132 }, { "epoch": 1.1035364294844483, "grad_norm": 40.615117543713055, "learning_rate": 3.872871483072056e-07, "loss": 0.2951, "step": 5180, "success_rate.epoch.env.abd": 0.9791666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8666666666666667, "success_rate.epoch.env.math": 0.9458483754512635, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7519582245430809, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712082246329168, "success_rate.epoch.global": 0.8581780538302277, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980769230769231, "tokens_p.mean_in_band": 0.7958096590909091, "tokens_rate.above_band": 0.9672619047619048, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03273809523809524 }, { "epoch": 1.1046016190881978, "grad_norm": 112.40872957289658, "learning_rate": 3.872621700305701e-07, "loss": 0.2109, "step": 5185, "success_rate.epoch.env.abd": 0.98, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8688524590163934, "success_rate.epoch.env.math": 0.9464285714285714, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7532467532467533, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716525746660114, "success_rate.epoch.global": 0.8596311475409836, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970588235294118, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9941520467836257, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005847953216374269 }, { "epoch": 1.1056668086919472, "grad_norm": 77.06821168483752, "learning_rate": 3.8723717188546095e-07, "loss": 0.274, "step": 5190, "success_rate.epoch.env.abd": 0.98, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.8709677419354839, "success_rate.epoch.env.math": 0.9469964664310954, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7506426735218509, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717757208842655, "success_rate.epoch.global": 0.859026369168357, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9956995412844036, "tokens_p.mean_in_band": 0.4481534090909091, "tokens_rate.above_band": 0.9083333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09166666666666666 }, { "epoch": 1.1067319982956967, "grad_norm": 201.13386995976776, "learning_rate": 3.872121538900967e-07, "loss": 0.4417, "step": 5195, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.873015873015873, "success_rate.epoch.env.math": 0.9471830985915493, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.751269035532995, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8721057532498308, "success_rate.epoch.global": 0.8594377510040161, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977678571428571, "tokens_p.mean_in_band": 0.740234375, "tokens_rate.above_band": 0.9680851063829787, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031914893617021274 }, { "epoch": 1.1077971878994461, "grad_norm": 84.52786372488357, "learning_rate": 3.871871160627102e-07, "loss": 0.3288, "step": 5200, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.8661417322834646, "success_rate.epoch.env.math": 0.9477351916376306, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7525, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716429275026196, "success_rate.epoch.global": 0.8588469184890656, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9920634920634921, "tokens_p.mean_in_band": 0.5963541666666666, "tokens_rate.above_band": 0.9130434782608695, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08695652173913043 }, { "epoch": 1.1088623775031956, "grad_norm": 304.79828297604735, "learning_rate": 3.8716205842154896e-07, "loss": 0.3263, "step": 5205, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9423076923076923, "success_rate.epoch.env.logic": 0.8682170542635659, "success_rate.epoch.env.math": 0.9480968858131488, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7531172069825436, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8724249760729862, "success_rate.epoch.global": 0.860236220472441, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990207765667575, "tokens_p.mean_in_band": 0.5625, "tokens_rate.above_band": 0.9993192648059904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0006807351940095302 }, { "epoch": 1.109927567106945, "grad_norm": 89.70948254159615, "learning_rate": 3.8713698098487466e-07, "loss": 0.281, "step": 5210, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8692307692307693, "success_rate.epoch.env.math": 0.9484536082474226, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7506172839506173, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8725495316374197, "success_rate.epoch.global": 0.8596491228070176, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984065686862628, "tokens_p.mean_in_band": 0.6142578125, "tokens_rate.above_band": 0.9928528886241811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00714711137581894 }, { "epoch": 1.1109927567106945, "grad_norm": 158.5209917720987, "learning_rate": 3.8711188377096365e-07, "loss": 0.2265, "step": 5215, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9393939393939394, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9454545454545454, "success_rate.epoch.env.logic": 0.8721804511278195, "success_rate.epoch.env.math": 0.9486301369863014, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.75, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8730416195546336, "success_rate.epoch.global": 0.86003861003861, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997344944774851, "tokens_p.mean_in_band": 0.7356770833333334, "tokens_rate.above_band": 0.997457627118644, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002542372881355932 }, { "epoch": 1.112057946314444, "grad_norm": 25.825288991355624, "learning_rate": 3.8708676679810666e-07, "loss": 0.3018, "step": 5220, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8731343283582089, "success_rate.epoch.env.math": 0.9489795918367347, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7518248175182481, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8737293812852063, "success_rate.epoch.global": 0.861376673040153, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974398569570871, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.1131231359181935, "grad_norm": 45.0277663430148, "learning_rate": 3.870616300846086e-07, "loss": 0.2867, "step": 5225, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8740740740740741, "success_rate.epoch.env.math": 0.9491525423728814, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7541766109785203, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8740443348954141, "success_rate.epoch.global": 0.8617424242424242, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915865384615384, "tokens_p.mean_in_band": 0.24283854166666666, "tokens_rate.above_band": 0.9719626168224299, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028037383177570093 }, { "epoch": 1.114188325521943, "grad_norm": 57.33559610050672, "learning_rate": 3.8703647364878893e-07, "loss": 0.3743, "step": 5230, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8740740740740741, "success_rate.epoch.env.math": 0.9491525423728814, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7494145199063232, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8736737908294401, "success_rate.epoch.global": 0.8592870544090057, "success_rate.window.env.abd": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9933176100628931, "tokens_p.mean_in_band": 0.50633544921875, "tokens_rate.above_band": 0.8412698412698413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15873015873015872 }, { "epoch": 1.1152535151256924, "grad_norm": 39.400872988123666, "learning_rate": 3.870112975089817e-07, "loss": 0.3044, "step": 5235, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9491525423728814, "success_rate.epoch.env.logic": 0.8759124087591241, "success_rate.epoch.env.math": 0.9494949494949495, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7505827505827506, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8743701767399089, "success_rate.epoch.global": 0.8605947955390335, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987549800796812, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9992038216560509, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0007961783439490446 }, { "epoch": 1.1163187047294418, "grad_norm": 67.95782985940595, "learning_rate": 3.869861016835349e-07, "loss": 0.3773, "step": 5240, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8776978417266187, "success_rate.epoch.env.math": 0.9504950495049505, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7505827505827506, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8747004486130556, "success_rate.epoch.global": 0.861878453038674, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981773997569866, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9987864077669902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012135922330097086 }, { "epoch": 1.1173838943331913, "grad_norm": 850.734297224957, "learning_rate": 3.8696088619081106e-07, "loss": 0.4694, "step": 5245, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8811188811188811, "success_rate.epoch.env.math": 0.9506578947368421, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7517401392111369, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8729964872283005, "success_rate.epoch.global": 0.8622262773722628, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971382783882784, "tokens_p.mean_in_band": 0.5446428571428571, "tokens_rate.above_band": 0.975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025 }, { "epoch": 1.1184490839369408, "grad_norm": 93.74605823030838, "learning_rate": 3.8693565104918715e-07, "loss": 0.3742, "step": 5250, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8827586206896552, "success_rate.epoch.env.math": 0.9514563106796117, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7534562211981567, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8733741451828969, "success_rate.epoch.global": 0.8634719710669078, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.989010989010989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01098901098901099 }, { "epoch": 1.1195142735406902, "grad_norm": 167.60115870344111, "learning_rate": 3.8691039627705433e-07, "loss": 0.3261, "step": 5255, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8843537414965986, "success_rate.epoch.env.math": 0.9516129032258065, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7557077625570776, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8738685715819883, "success_rate.epoch.global": 0.8646953405017921, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986538461538461, "tokens_p.mean_in_band": 0.67109375, "tokens_rate.above_band": 0.9923664122137404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007633587786259542 }, { "epoch": 1.1205794631444397, "grad_norm": 75.72491858043374, "learning_rate": 3.868851218928181e-07, "loss": 0.3269, "step": 5260, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.9459459459459459, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.88, "success_rate.epoch.env.math": 0.9517684887459807, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7573696145124716, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8737744986256676, "success_rate.epoch.global": 0.8650088809946714, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979771205357143, "tokens_p.mean_in_band": 0.6905381944444444, "tokens_rate.above_band": 0.9803063457330415, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019693654266958426 }, { "epoch": 1.1216446527481891, "grad_norm": 110.57320370533763, "learning_rate": 3.8685982791489825e-07, "loss": 0.2583, "step": 5265, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8807947019867549, "success_rate.epoch.env.math": 0.9525316455696202, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7584650112866818, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8741450195976032, "success_rate.epoch.global": 0.8661971830985915, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961300309597523, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9938461538461538, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006153846153846154 }, { "epoch": 1.1227098423519386, "grad_norm": 154.89165743127634, "learning_rate": 3.868345143617288e-07, "loss": 0.362, "step": 5270, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.881578947368421, "success_rate.epoch.env.math": 0.9526813880126183, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7583892617449665, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8743979721968707, "success_rate.epoch.global": 0.8664921465968587, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991871387283237, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9885714285714285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011428571428571429 }, { "epoch": 1.123775031955688, "grad_norm": 68.2933313424731, "learning_rate": 3.868091812517581e-07, "loss": 0.171, "step": 5275, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8838709677419355, "success_rate.epoch.env.math": 0.9532710280373832, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7577777777777778, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8746043518724244, "success_rate.epoch.global": 0.8667820069204152, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994921875, "tokens_p.mean_in_band": 0.6536458333333334, "tokens_rate.above_band": 0.9302325581395349, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06976744186046512 }, { "epoch": 1.1248402215594375, "grad_norm": 242.5255759945865, "learning_rate": 3.867838286034488e-07, "loss": 0.1494, "step": 5280, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8846153846153846, "success_rate.epoch.env.math": 0.9507692307692308, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7593818984547461, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.874590419170812, "success_rate.epoch.global": 0.8670668953687821, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000490837696335, "tokens_p.mean_in_band": 0.76123046875, "tokens_rate.above_band": 0.9958289885297185, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004171011470281543 }, { "epoch": 1.125905411163187, "grad_norm": 310.19556807866786, "learning_rate": 3.8675845643527765e-07, "loss": 0.3814, "step": 5285, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.8853503184713376, "success_rate.epoch.env.math": 0.9512195121951219, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7614879649890591, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8725047360693384, "success_rate.epoch.global": 0.8673469387755102, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989908854166667, "tokens_p.mean_below_band": 4.602043190971017e-10, "tokens_p.mean_in_band": 0.79931640625, "tokens_rate.above_band": 0.9907120743034056, "tokens_rate.below_band": 0.0010319917440660474, "tokens_rate.in_band": 0.008255933952528379 }, { "epoch": 1.1269706007669364, "grad_norm": 71.57284706372906, "learning_rate": 3.8673306476573575e-07, "loss": 0.315, "step": 5290, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8881987577639752, "success_rate.epoch.env.math": 0.9518072289156626, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7620087336244541, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8729342791424038, "success_rate.epoch.global": 0.8684654300168634, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986293859649122, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.128035790370686, "grad_norm": 82.1475035259727, "learning_rate": 3.867076536133284e-07, "loss": 0.2763, "step": 5295, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8881987577639752, "success_rate.epoch.env.math": 0.9520958083832335, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7660944206008584, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8733319397282198, "success_rate.epoch.global": 0.8695652173913043, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943693693693694, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02631578947368421 }, { "epoch": 1.1291009799744354, "grad_norm": 89.6272378455988, "learning_rate": 3.86682222996575e-07, "loss": 0.3422, "step": 5300, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8902439024390244, "success_rate.epoch.env.math": 0.9525222551928784, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7633262260127932, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8733298143591601, "success_rate.epoch.global": 0.8689883913764511, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946732954545454, "tokens_p.mean_in_band": 0.4893465909090909, "tokens_rate.above_band": 0.8, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2 }, { "epoch": 1.1301661695781848, "grad_norm": 95.10152157247057, "learning_rate": 3.8665677293400924e-07, "loss": 0.2529, "step": 5305, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8909090909090909, "success_rate.epoch.env.math": 0.9529411764705882, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7648305084745762, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8736800749572095, "success_rate.epoch.global": 0.8700657894736842, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969512195121951, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.1312313591819343, "grad_norm": 27.945044526773536, "learning_rate": 3.866313034441789e-07, "loss": 0.3504, "step": 5310, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.891566265060241, "success_rate.epoch.env.math": 0.9534883720930233, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7668067226890757, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.873991765853673, "success_rate.epoch.global": 0.8711256117455138, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994410569105691, "tokens_p.mean_in_band": 0.8505859375, "tokens_rate.above_band": 0.968503937007874, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031496062992125984 }, { "epoch": 1.1322965487856838, "grad_norm": 118.49802409828332, "learning_rate": 3.86605814545646e-07, "loss": 0.2682, "step": 5315, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8928571428571429, "success_rate.epoch.env.math": 0.9541547277936963, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7677824267782427, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8742583965433769, "success_rate.epoch.global": 0.872168284789644, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988799283154122, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9928825622775801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0071174377224199285 }, { "epoch": 1.1333617383894334, "grad_norm": 77.98706334718261, "learning_rate": 3.8658030625698663e-07, "loss": 0.385, "step": 5320, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.95, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.893491124260355, "success_rate.epoch.env.math": 0.9541547277936963, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7654320987654321, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8742189151499634, "success_rate.epoch.global": 0.8707865168539326, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9960466867469879, "tokens_p.mean_in_band": 0.59765625, "tokens_rate.above_band": 0.9485714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05142857142857143 }, { "epoch": 1.1344269279931827, "grad_norm": 153.89347713916143, "learning_rate": 3.8655477859679114e-07, "loss": 0.1973, "step": 5325, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9538461538461539, "success_rate.epoch.env.logic": 0.8947368421052632, "success_rate.epoch.env.math": 0.9544159544159544, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.764344262295082, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8729156489105798, "success_rate.epoch.global": 0.8702229299363057, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974908759124088, "tokens_p.mean_in_band": 0.5177734375, "tokens_rate.above_band": 0.9762470308788599, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023752969121140142 }, { "epoch": 1.1354921175969324, "grad_norm": 233.52559795034185, "learning_rate": 3.865292315836638e-07, "loss": 0.4401, "step": 5330, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9538461538461539, "success_rate.epoch.env.logic": 0.8953488372093024, "success_rate.epoch.env.math": 0.9550561797752809, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7621951219512195, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8728341107396257, "success_rate.epoch.global": 0.8696682464454977, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9917091836734694, "tokens_p.mean_in_band": 0.5876736111111112, "tokens_rate.above_band": 0.9158878504672897, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08411214953271028 }, { "epoch": 1.1365573072006816, "grad_norm": 72.88675988213836, "learning_rate": 3.8650366523622307e-07, "loss": 0.2221, "step": 5335, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8977272727272727, "success_rate.epoch.env.math": 0.9550561797752809, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7620967741935484, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.873104964235953, "success_rate.epoch.global": 0.8699059561128527, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990768094534712, "tokens_p.mean_in_band": 0.63671875, "tokens_rate.above_band": 0.9941262848751835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005873715124816446 }, { "epoch": 1.1376224968044313, "grad_norm": 72.2643236243864, "learning_rate": 3.8647807957310167e-07, "loss": 0.3064, "step": 5340, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8983050847457628, "success_rate.epoch.env.math": 0.9555555555555556, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.762, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8732159458923712, "success_rate.epoch.global": 0.8701399688958009, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965277777777778, "tokens_p.mean_in_band": 0.5963541666666666, "tokens_rate.above_band": 0.972972972972973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02702702702702703 }, { "epoch": 1.1386876864081807, "grad_norm": 115.95460668351762, "learning_rate": 3.8645247461294607e-07, "loss": 0.3364, "step": 5345, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.898876404494382, "success_rate.epoch.env.math": 0.9558011049723757, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7628458498023716, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8734287772577687, "success_rate.epoch.global": 0.8703703703703703, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.991042345276873, "tokens_p.mean_in_band": 0.7278645833333334, "tokens_rate.above_band": 0.9715189873417721, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028481012658227847 }, { "epoch": 1.1397528760119302, "grad_norm": 15.346054358634856, "learning_rate": 3.8642685037441705e-07, "loss": 0.414, "step": 5350, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9564032697547684, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7637795275590551, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8737304054749373, "success_rate.epoch.global": 0.8713629402756509, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998342803030303, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.1408180656156797, "grad_norm": 101.31493930051695, "learning_rate": 3.864012068761895e-07, "loss": 0.2962, "step": 5355, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9010989010989011, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.763671875, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8739540728984022, "success_rate.epoch.global": 0.871580547112462, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990873247663551, "tokens_p.mean_in_band": 0.578125, "tokens_rate.above_band": 0.9839080459770115, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016091954022988506 }, { "epoch": 1.1418832552194291, "grad_norm": 104.0451129278403, "learning_rate": 3.86375544136952e-07, "loss": 0.3797, "step": 5360, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9571428571428572, "success_rate.epoch.env.logic": 0.9010989010989011, "success_rate.epoch.env.math": 0.956989247311828, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7620889748549323, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8739091389028338, "success_rate.epoch.global": 0.8710407239819005, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967369477911646, "tokens_p.mean_below_band": 3.655441105365753e-08, "tokens_p.mean_in_band": 0.40625, "tokens_rate.above_band": 0.9467680608365019, "tokens_rate.below_band": 0.0038022813688212928, "tokens_rate.in_band": 0.049429657794676805 }, { "epoch": 1.1429484448231786, "grad_norm": 80.17257523697818, "learning_rate": 3.863498621754075e-07, "loss": 0.2845, "step": 5365, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.9016393442622951, "success_rate.epoch.env.math": 0.9574468085106383, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7634615384615384, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8742007107148605, "success_rate.epoch.global": 0.8720059880239521, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979395604395604, "tokens_p.mean_in_band": 0.720703125, "tokens_rate.above_band": 0.978494623655914, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021505376344086023 }, { "epoch": 1.144013634426928, "grad_norm": 50.97903601685837, "learning_rate": 3.8632416101027286e-07, "loss": 0.2743, "step": 5370, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.9021739130434783, "success_rate.epoch.env.math": 0.9578947368421052, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7609942638623327, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8741270731099607, "success_rate.epoch.global": 0.8714710252600297, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981155778894473, "tokens_p.mean_in_band": 0.325, "tokens_rate.above_band": 0.9875930521091811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01240694789081886 }, { "epoch": 1.1450788240306775, "grad_norm": 150.86810287871177, "learning_rate": 3.8629844066027877e-07, "loss": 0.2703, "step": 5375, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8978494623655914, "success_rate.epoch.env.math": 0.95822454308094, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7623574144486692, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.873928358468675, "success_rate.epoch.global": 0.8716814159292036, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963884430176565, "tokens_p.mean_in_band": 0.46875, "tokens_rate.above_band": 0.978021978021978, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02197802197802198 }, { "epoch": 1.146144013634427, "grad_norm": 155.58822346589005, "learning_rate": 3.862727011441701e-07, "loss": 0.2506, "step": 5380, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8983957219251337, "success_rate.epoch.env.math": 0.9588688946015425, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7632575757575758, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8741767037441367, "success_rate.epoch.global": 0.8726207906295754, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957107843137255, "tokens_p.mean_in_band": 0.8151041666666666, "tokens_rate.above_band": 0.9855072463768116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014492753623188406 }, { "epoch": 1.1472092032381764, "grad_norm": 170.61846308388598, "learning_rate": 3.8624694248070574e-07, "loss": 0.3202, "step": 5385, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.898936170212766, "success_rate.epoch.env.math": 0.9592875318066157, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7654784240150094, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8744657886305128, "success_rate.epoch.global": 0.873546511627907, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955658783783784, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.961038961038961, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03896103896103896 }, { "epoch": 1.148274392841926, "grad_norm": 475.247067033273, "learning_rate": 3.8622116468865823e-07, "loss": 0.3574, "step": 5390, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8994708994708994, "success_rate.epoch.env.math": 0.9593908629441624, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7680890538033395, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8836188832792388, "success_rate.epoch.global": 0.8744588744588745, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974724264705882, "tokens_p.mean_in_band": 0.839453125, "tokens_rate.above_band": 0.9819494584837545, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018050541516245487 }, { "epoch": 1.1493395824456754, "grad_norm": 663.6783173874302, "learning_rate": 3.8619536778681434e-07, "loss": 0.4983, "step": 5395, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.9010416666666666, "success_rate.epoch.env.math": 0.9596977329974811, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7679558011049724, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8837774636929404, "success_rate.epoch.global": 0.8746418338108882, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993122009569378, "tokens_p.mean_in_band": 0.7061941964285714, "tokens_rate.above_band": 0.9372197309417041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06278026905829596 }, { "epoch": 1.1504047720494248, "grad_norm": 231.91896777017487, "learning_rate": 3.861695517939747e-07, "loss": 0.3775, "step": 5400, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.9015544041450777, "success_rate.epoch.env.math": 0.9601990049751243, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7692307692307693, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8840409849365234, "success_rate.epoch.global": 0.8755334281650071, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999274661508704, "tokens_p.mean_in_band": 0.7213541666666666, "tokens_rate.above_band": 0.988527724665392, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011472275334608031 }, { "epoch": 1.1514699616531743, "grad_norm": 51.736370475595805, "learning_rate": 3.861437167289537e-07, "loss": 0.2492, "step": 5405, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.9025641025641026, "success_rate.epoch.env.math": 0.9603960396039604, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7695099818511797, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8821558688861643, "success_rate.epoch.global": 0.875, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926215277777778, "tokens_p.mean_in_band": 0.5592041015625, "tokens_rate.above_band": 0.8181818181818182, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18181818181818182 }, { "epoch": 1.1525351512569237, "grad_norm": 88.13301482797574, "learning_rate": 3.8611786261057983e-07, "loss": 0.3243, "step": 5410, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.9035532994923858, "success_rate.epoch.env.math": 0.9605911330049262, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7719928186714542, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8824892440815755, "success_rate.epoch.global": 0.8758765778401122, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9914010067114094, "tokens_p.mean_in_band": 0.771484375, "tokens_rate.above_band": 0.9738562091503268, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026143790849673203 }, { "epoch": 1.1536003408606732, "grad_norm": 116.91828344381891, "learning_rate": 3.860919894576954e-07, "loss": 0.3771, "step": 5415, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.905, "success_rate.epoch.env.math": 0.960880195599022, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7736185383244206, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8827948334229098, "success_rate.epoch.global": 0.8767409470752089, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949127906976745, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.1546655304644227, "grad_norm": 38.94428124621091, "learning_rate": 3.8606609728915655e-07, "loss": 0.3552, "step": 5420, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9583333333333334, "success_rate.epoch.env.logic": 0.9054726368159204, "success_rate.epoch.env.math": 0.9609756097560975, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7742504409171076, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8829760922741143, "success_rate.epoch.global": 0.8769017980636238, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969858156028368, "tokens_p.mean_in_band": 0.5529513888888888, "tokens_rate.above_band": 0.9873949579831933, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012605042016806723 }, { "epoch": 1.1557307200681721, "grad_norm": 25.593705439809174, "learning_rate": 3.860401861238333e-07, "loss": 0.2794, "step": 5425, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9583333333333334, "success_rate.epoch.env.logic": 0.9064039408866995, "success_rate.epoch.env.math": 0.9610705596107056, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.775438596491228, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8816962684892178, "success_rate.epoch.global": 0.8770604395604396, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990384615384615, "tokens_p.mean_in_band": 0.6838727678571429, "tokens_rate.above_band": 0.9653465346534653, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034653465346534656 }, { "epoch": 1.1567959096719216, "grad_norm": 76.90661965573406, "learning_rate": 3.860142559806096e-07, "loss": 0.3634, "step": 5430, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.958904109589041, "success_rate.epoch.env.logic": 0.9024390243902439, "success_rate.epoch.env.math": 0.9611650485436893, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7766143106457243, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8817789439332308, "success_rate.epoch.global": 0.8772169167803547, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981264551804424, "tokens_p.mean_in_band": 0.6155894886363636, "tokens_rate.above_band": 0.993637941006362, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006362058993637941 }, { "epoch": 1.157861099275671, "grad_norm": 78.61093805233676, "learning_rate": 3.8598830687838304e-07, "loss": 0.3668, "step": 5435, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.958904109589041, "success_rate.epoch.env.logic": 0.9033816425120773, "success_rate.epoch.env.math": 0.9612590799031477, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7772020725388601, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8819266176945422, "success_rate.epoch.global": 0.8773712737127372, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968932748538012, "tokens_p.mean_in_band": 0.6066576086956522, "tokens_rate.above_band": 0.9674681753889675, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03253182461103253 }, { "epoch": 1.1589262888794205, "grad_norm": 187.5140818528434, "learning_rate": 3.859623388360652e-07, "loss": 0.4417, "step": 5440, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.96, "success_rate.epoch.env.logic": 0.9038461538461539, "success_rate.epoch.env.math": 0.9615384615384616, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7753001715265866, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8819209706370945, "success_rate.epoch.global": 0.8768506056527591, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004460745440127, "tokens_p.mean_in_band": 0.5065104166666666, "tokens_rate.above_band": 0.99057344854674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009426551453260016 }, { "epoch": 1.15999147848317, "grad_norm": 318.484063452464, "learning_rate": 3.8593635187258134e-07, "loss": 0.3525, "step": 5445, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9605263157894737, "success_rate.epoch.env.logic": 0.9056603773584906, "success_rate.epoch.env.math": 0.9616306954436451, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7747440273037542, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8802414345116113, "success_rate.epoch.global": 0.8763368983957219, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994612068965517, "tokens_p.mean_in_band": 0.43136160714285715, "tokens_rate.above_band": 0.9914529914529915, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008547008547008548 }, { "epoch": 1.1610566680869194, "grad_norm": 76.04912135143246, "learning_rate": 3.8591034600687063e-07, "loss": 0.1966, "step": 5450, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.9069767441860465, "success_rate.epoch.env.math": 0.9619047619047619, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7755102040816326, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8805476850402968, "success_rate.epoch.global": 0.8771580345285525, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995005707762558, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.162121857690669, "grad_norm": 17.680857182509204, "learning_rate": 3.8588432125788597e-07, "loss": 0.3327, "step": 5455, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.9078341013824884, "success_rate.epoch.env.math": 0.9622641509433962, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7753378378378378, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8806426286758678, "success_rate.epoch.global": 0.8773087071240105, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945987654320988, "tokens_p.mean_in_band": 0.6701388888888888, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 1.1631870472944184, "grad_norm": 32.80299912299739, "learning_rate": 3.85858277644594e-07, "loss": 0.2905, "step": 5460, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.9078341013824884, "success_rate.epoch.env.math": 0.9604651162790697, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7764705882352941, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8806323946385192, "success_rate.epoch.global": 0.8774574049803407, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958791208791209, "tokens_p.mean_in_band": 0.77734375, "tokens_rate.above_band": 0.978494623655914, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021505376344086023 }, { "epoch": 1.1642522368981678, "grad_norm": 146.2031288593941, "learning_rate": 3.858322151859751e-07, "loss": 0.2195, "step": 5465, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.9036697247706422, "success_rate.epoch.env.math": 0.960919540229885, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7775919732441472, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8804413298444334, "success_rate.epoch.global": 0.8776041666666666, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972826086956522, "tokens_p.mean_in_band": 0.7534722222222222, "tokens_rate.above_band": 0.9745762711864406, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025423728813559324 }, { "epoch": 1.1653174265019173, "grad_norm": 327.0093868392864, "learning_rate": 3.8580613390102334e-07, "loss": 0.4452, "step": 5470, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.9049773755656109, "success_rate.epoch.env.math": 0.9612756264236902, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7783333333333333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8807080242357223, "success_rate.epoch.global": 0.8783958602846055, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976937269372693, "tokens_p.mean_in_band": 0.72265625, "tokens_rate.above_band": 0.9963235294117647, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003676470588235294 }, { "epoch": 1.1663826161056667, "grad_norm": 110.09016909146698, "learning_rate": 3.857800338087467e-07, "loss": 0.3896, "step": 5475, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.9054054054054054, "success_rate.epoch.env.math": 0.9614512471655329, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.7788778877887789, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8808301899803083, "success_rate.epoch.global": 0.87853470437018, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99658203125, "tokens_p.mean_in_band": 0.6439732142857143, "tokens_rate.above_band": 0.9481481481481482, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05185185185185185 }, { "epoch": 1.1674478057094162, "grad_norm": 60.61922062887736, "learning_rate": 3.8575391492816667e-07, "loss": 0.4347, "step": 5480, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.905829596412556, "success_rate.epoch.env.math": 0.9617977528089887, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.780327868852459, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8796546593041966, "success_rate.epoch.global": 0.8786717752234994, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984809027777778, "tokens_p.mean_in_band": 0.7020833333333333, "tokens_rate.above_band": 0.9056603773584906, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09433962264150944 }, { "epoch": 1.1685129953131657, "grad_norm": 141.97409396308447, "learning_rate": 3.8572777727831855e-07, "loss": 0.2526, "step": 5485, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9625, "success_rate.epoch.env.logic": 0.9070796460176991, "success_rate.epoch.env.math": 0.9617977528089887, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7804878048780488, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8798432891157364, "success_rate.epoch.global": 0.8788071065989848, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988650121065376, "tokens_p.mean_in_band": 0.601318359375, "tokens_rate.above_band": 0.9904076738609112, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009592326139088728 }, { "epoch": 1.1695781849169151, "grad_norm": 82.10796465010836, "learning_rate": 3.8570162087825116e-07, "loss": 0.2449, "step": 5490, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9625, "success_rate.epoch.env.logic": 0.9074889867841409, "success_rate.epoch.env.math": 0.9621380846325167, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.778675282714055, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.879763495211141, "success_rate.epoch.global": 0.8783102143757882, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946646341463414, "tokens_p.mean_in_band": 0.587890625, "tokens_rate.above_band": 0.9461538461538461, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05384615384615385 }, { "epoch": 1.1706433745206646, "grad_norm": 198.15483683805763, "learning_rate": 3.856754457470272e-07, "loss": 0.4647, "step": 5495, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9361702127659575, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9082969432314411, "success_rate.epoch.env.math": 0.9623059866962306, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7797427652733119, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8800077210487239, "success_rate.epoch.global": 0.8790726817042607, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990712412587412, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9913344887348353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008665511265164644 }, { "epoch": 1.171708564124414, "grad_norm": 72.65972204623101, "learning_rate": 3.856492519037229e-07, "loss": 0.3026, "step": 5500, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.908695652173913, "success_rate.epoch.env.math": 0.9623059866962306, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.78060413354531, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8802890769535933, "success_rate.epoch.global": 0.8792029887920298, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986817617866005, "tokens_p.mean_in_band": 0.65703125, "tokens_rate.above_band": 0.9527186761229315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04728132387706856 }, { "epoch": 1.1727737537281637, "grad_norm": 106.83478750642901, "learning_rate": 3.8562303936742817e-07, "loss": 0.3229, "step": 5505, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9626373626373627, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7823343848580442, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8805124300600354, "success_rate.epoch.global": 0.879950495049505, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968251992031872, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9980119284294234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019880715705765406 }, { "epoch": 1.173838943331913, "grad_norm": 333.1304944656572, "learning_rate": 3.8559680815724645e-07, "loss": 0.2103, "step": 5510, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9102564102564102, "success_rate.epoch.env.math": 0.962800875273523, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7824726134585289, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8806458157329579, "success_rate.epoch.global": 0.8800738007380073, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949324324324325, "tokens_p.mean_in_band": 0.6569010416666666, "tokens_rate.above_band": 0.9487179487179487, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05128205128205128 }, { "epoch": 1.1749041329356626, "grad_norm": 45.074148468155265, "learning_rate": 3.8557055829229486e-07, "loss": 0.235, "step": 5515, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9071729957805907, "success_rate.epoch.env.math": 0.9631236442516269, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7831513260530422, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8804724980687112, "success_rate.epoch.global": 0.8801955990220048, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9932228915662651, "tokens_p.mean_in_band": 0.70166015625, "tokens_rate.above_band": 0.9120879120879121, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08791208791208792 }, { "epoch": 1.175969322539412, "grad_norm": 0.0, "learning_rate": 3.855442897917042e-07, "loss": 0.3012, "step": 5520, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9634146341463414, "success_rate.epoch.env.logic": 0.9083333333333333, "success_rate.epoch.env.math": 0.9632829373650108, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7841614906832298, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8808413141308725, "success_rate.epoch.global": 0.8809234507897934, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0018025078369905, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9993734335839599, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0006265664160401002 }, { "epoch": 1.1770345121431616, "grad_norm": 193.9761483691228, "learning_rate": 3.8551800267461863e-07, "loss": 0.2372, "step": 5525, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9634146341463414, "success_rate.epoch.env.logic": 0.9087136929460581, "success_rate.epoch.env.math": 0.9636752136752137, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7848297213622291, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8810317541435267, "success_rate.epoch.global": 0.8816425120772947, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974287974683544, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.9875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0125 }, { "epoch": 1.178099701746911, "grad_norm": 184.24616975516568, "learning_rate": 3.8549169696019613e-07, "loss": 0.2773, "step": 5530, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.963855421686747, "success_rate.epoch.env.logic": 0.9094650205761317, "success_rate.epoch.env.math": 0.9637526652452025, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7830769230769231, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8810177135611699, "success_rate.epoch.global": 0.8811524609843937, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975792253521126, "tokens_p.mean_in_band": 0.4, "tokens_rate.above_band": 0.9912739965095986, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008726003490401396 }, { "epoch": 1.1791648913506605, "grad_norm": 158.10076953042807, "learning_rate": 3.854653726676081e-07, "loss": 0.4389, "step": 5535, "success_rate.epoch.env.abd": 0.9875, "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9098360655737705, "success_rate.epoch.env.math": 0.9640591966173362, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7840735068912711, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8801411607181179, "success_rate.epoch.global": 0.8812649164677804, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9885684742647058, "tokens_p.mean_in_band": 0.656, "tokens_rate.above_band": 0.8131539611360239, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18684603886397608 }, { "epoch": 1.18023008095441, "grad_norm": 800.7934423077986, "learning_rate": 3.8543902981603944e-07, "loss": 0.2522, "step": 5540, "success_rate.epoch.env.abd": 0.9875, "success_rate.epoch.env.agentgym:alfworld": 0.9387755102040817, "success_rate.epoch.env.agentgym:sciworld": 0.9782608695652174, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9102040816326531, "success_rate.epoch.env.math": 0.9641350210970464, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7833333333333333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8801142213527228, "success_rate.epoch.global": 0.8807829181494662, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9991680532445923, "tokens_p.mean_in_band": 0.5614013671875, "tokens_rate.above_band": 0.986863711001642, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013136288998357963 }, { "epoch": 1.1812952705581594, "grad_norm": 207.2933995708041, "learning_rate": 3.8541266842468866e-07, "loss": 0.3123, "step": 5545, "success_rate.epoch.env.abd": 0.9876543209876543, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9068825910931174, "success_rate.epoch.env.math": 0.9642857142857143, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.783987915407855, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8782749843171479, "success_rate.epoch.global": 0.8803066037735849, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980849582172702, "tokens_p.mean_in_band": 0.4635416666666667, "tokens_rate.above_band": 0.9917127071823204, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008287292817679558 }, { "epoch": 1.1823604601619089, "grad_norm": 65.42479756802668, "learning_rate": 3.8538628851276777e-07, "loss": 0.3117, "step": 5550, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.907258064516129, "success_rate.epoch.env.math": 0.964509394572025, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7852852852852853, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8784610825578641, "success_rate.epoch.global": 0.8810082063305978, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976308664259927, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.992831541218638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007168458781362007 }, { "epoch": 1.1834256497656583, "grad_norm": 48.38196783799604, "learning_rate": 3.853598900995022e-07, "loss": 0.2193, "step": 5555, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.908, "success_rate.epoch.env.math": 0.9628099173553719, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7859281437125748, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8784711268090036, "success_rate.epoch.global": 0.8811188811188811, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966317365269461, "tokens_p.mean_below_band": 4.3655745685100555e-09, "tokens_p.mean_in_band": 0.751953125, "tokens_rate.above_band": 0.9709302325581395, "tokens_rate.below_band": 0.005813953488372093, "tokens_rate.in_band": 0.023255813953488372 }, { "epoch": 1.1844908393694078, "grad_norm": 84.9673215473575, "learning_rate": 3.85333473204131e-07, "loss": 0.3241, "step": 5560, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9083665338645418, "success_rate.epoch.env.math": 0.9628099173553719, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7863501483679525, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8785932750785818, "success_rate.epoch.global": 0.8812282734646582, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981203007518797, "tokens_p.mean_in_band": 0.541015625, "tokens_rate.above_band": 0.9950124688279302, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004987531172069825 }, { "epoch": 1.1855560289731573, "grad_norm": 181.6094812162179, "learning_rate": 3.8530703784590655e-07, "loss": 0.2354, "step": 5565, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.92, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9051383399209486, "success_rate.epoch.env.math": 0.9631147540983607, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7866666666666666, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8772914630776424, "success_rate.epoch.global": 0.8807603686635944, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961832061068703, "tokens_p.mean_in_band": 0.5447048611111112, "tokens_rate.above_band": 0.9622245540398741, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03777544596012592 }, { "epoch": 1.1866212185769067, "grad_norm": 124.83374529074972, "learning_rate": 3.852805840440948e-07, "loss": 0.3869, "step": 5570, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.905511811023622, "success_rate.epoch.env.math": 0.9633401221995926, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7879234167893961, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776513570040371, "success_rate.epoch.global": 0.8814432989690721, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970201711491442, "tokens_p.mean_in_band": 0.77265625, "tokens_rate.above_band": 0.9761336515513126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02386634844868735 }, { "epoch": 1.1876864081806562, "grad_norm": 144.63310154738687, "learning_rate": 3.8525411181797513e-07, "loss": 0.305, "step": 5575, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9058823529411765, "success_rate.epoch.env.math": 0.9634888438133874, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7865497076023392, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776211770389462, "success_rate.epoch.global": 0.8809116809116809, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9964539007092199, "tokens_p.mean_in_band": 0.59375, "tokens_rate.above_band": 0.9215686274509803, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0784313725490196 }, { "epoch": 1.1887515977844056, "grad_norm": 101.585484694657, "learning_rate": 3.8522762118684013e-07, "loss": 0.5272, "step": 5580, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9058823529411765, "success_rate.epoch.env.math": 0.9617706237424547, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7855072463768116, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8773702060119952, "success_rate.epoch.global": 0.8798866855524079, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7083333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9847898230088495, "tokens_p.mean_in_band": 0.574187247983871, "tokens_rate.above_band": 0.7847222222222222, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2152777777777778 }, { "epoch": 1.189816787388155, "grad_norm": 164.83537110675357, "learning_rate": 3.852011121699962e-07, "loss": 0.2708, "step": 5585, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9058823529411765, "success_rate.epoch.env.math": 0.962, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7873563218390804, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8775721953380077, "success_rate.epoch.global": 0.8805633802816901, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9904279279279279, "tokens_p.mean_in_band": 0.626953125, "tokens_rate.above_band": 0.9487179487179487, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05128205128205128 }, { "epoch": 1.1908819769919046, "grad_norm": 145.51518788109993, "learning_rate": 3.8517458478676275e-07, "loss": 0.325, "step": 5590, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9066147859922179, "success_rate.epoch.env.math": 0.9623762376237623, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7882689556509299, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777559502913396, "success_rate.epoch.global": 0.8812324929971989, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9902912621359223, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9903846153846154, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009615384615384616 }, { "epoch": 1.191947166595654, "grad_norm": 53.3532888710968, "learning_rate": 3.8514803905647286e-07, "loss": 0.2571, "step": 5595, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9076923076923077, "success_rate.epoch.env.math": 0.9625984251968503, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7894736842105263, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779836264579557, "success_rate.epoch.global": 0.8818941504178273, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987873134328358, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9970238095238095, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002976190476190476 }, { "epoch": 1.1930123561994035, "grad_norm": 43.17086447186869, "learning_rate": 3.851214749984728e-07, "loss": 0.3857, "step": 5600, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9076923076923077, "success_rate.epoch.env.math": 0.962671905697446, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7893258426966292, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779768663658373, "success_rate.epoch.global": 0.8814404432132964, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9880952380952381, "tokens_p.mean_in_band": 0.3967013888888889, "tokens_rate.above_band": 0.9210526315789473, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07894736842105263 }, { "epoch": 1.194077545803153, "grad_norm": 135.6613944030098, "learning_rate": 3.850948926321223e-07, "loss": 0.2807, "step": 5605, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9076923076923077, "success_rate.epoch.env.math": 0.9627450980392157, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7910863509749304, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8782710444106478, "success_rate.epoch.global": 0.8820936639118457, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981429303278688, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.1951427354069024, "grad_norm": 52.47215054391258, "learning_rate": 3.850682919767944e-07, "loss": 0.2658, "step": 5610, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9215686274509803, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9080459770114943, "success_rate.epoch.env.math": 0.9631067961165048, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7908587257617729, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8783153846091313, "success_rate.epoch.global": 0.8821917808219178, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944444444444445, "tokens_p.mean_in_band": 0.60546875, "tokens_rate.above_band": 0.967741935483871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03225806451612903 }, { "epoch": 1.1962079250106519, "grad_norm": 265.44880193654274, "learning_rate": 3.850416730518754e-07, "loss": 0.348, "step": 5615, "success_rate.epoch.env.abd": 0.9882352941176471, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9045801526717557, "success_rate.epoch.env.math": 0.9631782945736435, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7895460797799174, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8780817282702493, "success_rate.epoch.global": 0.8811989100817439, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.997671568627451, "tokens_p.mean_in_band": 0.4725341796875, "tokens_rate.above_band": 0.9695817490494296, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030418250950570342 }, { "epoch": 1.1972731146144013, "grad_norm": 231.76419326673198, "learning_rate": 3.850150358767651e-07, "loss": 0.2946, "step": 5620, "success_rate.epoch.env.abd": 0.9882352941176471, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9049429657794676, "success_rate.epoch.env.math": 0.9634615384615385, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.7906976744186046, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8782794305921384, "success_rate.epoch.global": 0.8818428184281842, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992131294964028, "tokens_p.mean_in_band": 0.77734375, "tokens_rate.above_band": 0.996415770609319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0035842293906810036 }, { "epoch": 1.1983383042181508, "grad_norm": 55.25376279417916, "learning_rate": 3.849883804708764e-07, "loss": 0.1619, "step": 5625, "success_rate.epoch.env.abd": 0.9883720930232558, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9053030303030303, "success_rate.epoch.env.math": 0.9636711281070746, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.791268758526603, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8774400041630631, "success_rate.epoch.global": 0.8819407008086253, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970103790613718, "tokens_p.mean_in_band": 0.6615349264705882, "tokens_rate.above_band": 0.9702276707530648, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0297723292469352 }, { "epoch": 1.1994034938219003, "grad_norm": 527.4931450433074, "learning_rate": 3.849617068536356e-07, "loss": 0.2507, "step": 5630, "success_rate.epoch.env.abd": 0.9883720930232558, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.9053030303030303, "success_rate.epoch.env.math": 0.9638783269961977, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7913279132791328, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8765621876497424, "success_rate.epoch.global": 0.8815013404825738, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9851588628762542, "tokens_p.mean_in_band": 0.734631990131579, "tokens_rate.above_band": 0.887240356083086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11275964391691394 }, { "epoch": 1.2004686834256497, "grad_norm": 3507.9177473413506, "learning_rate": 3.849350150444822e-07, "loss": 0.2921, "step": 5635, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.9060150375939849, "success_rate.epoch.env.math": 0.9640831758034026, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7908232118758435, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8766236808595586, "success_rate.epoch.global": 0.8816, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979338842975206, "tokens_p.mean_in_band": 0.5338541666666666, "tokens_rate.above_band": 0.9758064516129032, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024193548387096774 }, { "epoch": 1.2015338730293992, "grad_norm": 83.35150067453999, "learning_rate": 3.8490830506286897e-07, "loss": 0.4006, "step": 5640, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.9063670411985019, "success_rate.epoch.env.math": 0.9642857142857143, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7908847184986595, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8767114494704156, "success_rate.epoch.global": 0.8816976127320955, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942396313364056, "tokens_p.mean_in_band": 0.658984375, "tokens_rate.above_band": 0.9559471365638766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04405286343612335 }, { "epoch": 1.2025990626331486, "grad_norm": 28.75466648149286, "learning_rate": 3.8488157692826207e-07, "loss": 0.279, "step": 5645, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9468085106382979, "success_rate.epoch.env.logic": 0.9067164179104478, "success_rate.epoch.env.math": 0.9646182495344506, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7911646586345381, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8768814952823951, "success_rate.epoch.global": 0.8823218997361477, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992774566473989, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9942528735632183, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005747126436781609 }, { "epoch": 1.203664252236898, "grad_norm": 285.68657119909125, "learning_rate": 3.8485483066014075e-07, "loss": 0.2338, "step": 5650, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9468085106382979, "success_rate.epoch.env.logic": 0.9067164179104478, "success_rate.epoch.env.math": 0.9648798521256932, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7928286852589641, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8770565524838194, "success_rate.epoch.global": 0.8829396325459318, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994338768115942, "tokens_p.mean_in_band": 0.734375, "tokens_rate.above_band": 0.9787234042553191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02127659574468085 }, { "epoch": 1.2047294418406476, "grad_norm": 117.06626280681023, "learning_rate": 3.848280662779974e-07, "loss": 0.461, "step": 5655, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9077490774907749, "success_rate.epoch.env.math": 0.9650735294117647, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7923280423280423, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8771734256065294, "success_rate.epoch.global": 0.8830287206266318, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972222222222222, "tokens_p.mean_in_band": 0.484375, "tokens_rate.above_band": 0.989010989010989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01098901098901099 }, { "epoch": 1.205794631444397, "grad_norm": 291.76164658597423, "learning_rate": 3.8480128380133774e-07, "loss": 0.3348, "step": 5660, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9479166666666666, "success_rate.epoch.env.logic": 0.9080882352941176, "success_rate.epoch.env.math": 0.9652014652014652, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7921052631578948, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8772749924575846, "success_rate.epoch.global": 0.8831168831168831, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998342175066313, "tokens_p.mean_in_band": 0.6028645833333334, "tokens_rate.above_band": 0.9973544973544973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0026455026455026454 }, { "epoch": 1.2068598210481465, "grad_norm": 310.4944857853484, "learning_rate": 3.847744832496807e-07, "loss": 0.3339, "step": 5665, "success_rate.epoch.env.abd": 0.9887640449438202, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9479166666666666, "success_rate.epoch.env.logic": 0.9084249084249084, "success_rate.epoch.env.math": 0.9653916211293261, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7934640522875817, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8774580196844751, "success_rate.epoch.global": 0.8837209302325582, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9904761904761905, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9722222222222222, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027777777777777776 }, { "epoch": 1.207925010651896, "grad_norm": 37.95794628739215, "learning_rate": 3.847476646425583e-07, "loss": 0.3587, "step": 5670, "success_rate.epoch.env.abd": 0.9887640449438202, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9484536082474226, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9655172413793104, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7935064935064935, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.877582656385898, "success_rate.epoch.global": 0.8838046272493574, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949024822695035, "tokens_p.mean_in_band": 0.6636284722222222, "tokens_rate.above_band": 0.94, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06 }, { "epoch": 1.2089902002556454, "grad_norm": 183.66622223791853, "learning_rate": 3.8472082799951577e-07, "loss": 0.2424, "step": 5675, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9094202898550725, "success_rate.epoch.env.math": 0.9657039711191335, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7930142302716688, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777759339871174, "success_rate.epoch.global": 0.8838874680306905, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987142218246869, "tokens_p.mean_below_band": 7.729977369308472e-08, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9964349376114082, "tokens_rate.below_band": 0.0017825311942959, "tokens_rate.in_band": 0.0017825311942959 }, { "epoch": 1.210055389859395, "grad_norm": 101.73876273024432, "learning_rate": 3.846939733401114e-07, "loss": 0.2749, "step": 5680, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9494949494949495, "success_rate.epoch.env.logic": 0.9097472924187726, "success_rate.epoch.env.math": 0.9658273381294964, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.79204107830552, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777752590113107, "success_rate.epoch.global": 0.8834605597964377, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924150485436893, "tokens_p.mean_in_band": 0.5642755681818182, "tokens_rate.above_band": 0.9493087557603687, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05069124423963134 }, { "epoch": 1.2111205794631443, "grad_norm": 78.2027511815762, "learning_rate": 3.846671006839167e-07, "loss": 0.3059, "step": 5685, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.9100719424460432, "success_rate.epoch.env.math": 0.9659498207885304, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7931034482758621, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8771562611703956, "success_rate.epoch.global": 0.8835443037974684, "success_rate.window.env.ded": 0.6666666666666666, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923911020104245, "tokens_p.mean_in_band": 0.5719088040865384, "tokens_rate.above_band": 0.865892972275951, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.134107027724049 }, { "epoch": 1.212185769066894, "grad_norm": 93.66850832785966, "learning_rate": 3.846402100505164e-07, "loss": 0.2398, "step": 5690, "success_rate.epoch.env.abd": 0.989010989010989, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.9113475177304965, "success_rate.epoch.env.math": 0.966131907308378, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7933673469387755, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8773523469701533, "success_rate.epoch.global": 0.8841309823677582, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987461419753086, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.998972250770812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0010277492291880781 }, { "epoch": 1.2132509586706433, "grad_norm": 158.98297010179246, "learning_rate": 3.8461330145950797e-07, "loss": 0.2034, "step": 5695, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9245283018867925, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.9113475177304965, "success_rate.epoch.env.math": 0.9662522202486679, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7936708860759494, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.877412362869647, "success_rate.epoch.global": 0.8842105263157894, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951923076923077, "tokens_p.mean_in_band": 0.29194078947368424, "tokens_rate.above_band": 0.8602941176470589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13970588235294118 }, { "epoch": 1.214316148274393, "grad_norm": 85.41108854358225, "learning_rate": 3.845863749305024e-07, "loss": 0.2455, "step": 5700, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.941747572815534, "success_rate.epoch.env.logic": 0.9119718309859155, "success_rate.epoch.env.math": 0.9664310954063604, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7944514501891551, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777353154871697, "success_rate.epoch.global": 0.884788029925187, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975094876660342, "tokens_p.mean_in_band": 0.65, "tokens_rate.above_band": 0.9906015037593985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009398496240601503 }, { "epoch": 1.2153813378781424, "grad_norm": 64.15294846241883, "learning_rate": 3.845594304831234e-07, "loss": 0.4064, "step": 5705, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9423076923076923, "success_rate.epoch.env.logic": 0.9122807017543859, "success_rate.epoch.env.math": 0.9667250437828371, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7952261306532663, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779114626781894, "success_rate.epoch.global": 0.8853598014888338, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9932650862068966, "tokens_p.mean_in_band": 0.83125, "tokens_rate.above_band": 0.9789029535864979, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02109704641350211 }, { "epoch": 1.2164465274818919, "grad_norm": 67.64612313912163, "learning_rate": 3.8453246813700797e-07, "loss": 0.2585, "step": 5710, "success_rate.epoch.env.abd": 0.9893617021276596, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9125874125874126, "success_rate.epoch.env.math": 0.9668411867364747, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7959949937421777, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8781566555528674, "success_rate.epoch.global": 0.8859259259259259, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987987276664861, "tokens_p.mean_in_band": 0.6490885416666666, "tokens_rate.above_band": 0.9967620075553157, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0032379924446842958 }, { "epoch": 1.2175117170856413, "grad_norm": 183.80215668402516, "learning_rate": 3.8450548791180607e-07, "loss": 0.3292, "step": 5715, "success_rate.epoch.env.abd": 0.9893617021276596, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9097222222222222, "success_rate.epoch.env.math": 0.9670138888888888, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7947761194029851, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8778010771390522, "success_rate.epoch.global": 0.885012285012285, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7000000000000001, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961145648312612, "tokens_p.mean_in_band": 0.72015625, "tokens_rate.above_band": 0.9574829931972789, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04251700680272109 }, { "epoch": 1.2185769066893908, "grad_norm": 90.83328489423037, "learning_rate": 3.8447848982718065e-07, "loss": 0.2913, "step": 5720, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9097222222222222, "success_rate.epoch.env.math": 0.9671848013816926, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7938271604938272, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777405258360079, "success_rate.epoch.global": 0.8845965770171149, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9925, "tokens_p.mean_in_band": 0.611328125, "tokens_rate.above_band": 0.946969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05303030303030303 }, { "epoch": 1.2196420962931402, "grad_norm": 45.55175745928387, "learning_rate": 3.8445147390280777e-07, "loss": 0.2472, "step": 5725, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9259259259259259, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9081632653061225, "success_rate.epoch.env.math": 0.9672977624784854, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7943349753694581, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776552366593099, "success_rate.epoch.global": 0.8846715328467153, "success_rate.window.env.logic": 0.8333333333333334, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973540145985401, "tokens_p.mean_in_band": 0.6535326086956522, "tokens_rate.above_band": 0.9675141242937854, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03248587570621469 }, { "epoch": 1.2207072858968897, "grad_norm": 469.69598646410327, "learning_rate": 3.8442444015837643e-07, "loss": 0.4276, "step": 5730, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9675213675213675, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7936117936117936, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.877816588333565, "success_rate.epoch.global": 0.8847457627118644, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991391184573003, "tokens_p.mean_in_band": 0.5263671875, "tokens_rate.above_band": 0.9477806788511749, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05221932114882506 }, { "epoch": 1.2217724755006392, "grad_norm": 60.51911243000029, "learning_rate": 3.843973886135886e-07, "loss": 0.3197, "step": 5735, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9093959731543624, "success_rate.epoch.env.math": 0.966044142614601, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7936507936507936, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777135737149911, "success_rate.epoch.global": 0.8843373493975903, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926801801801802, "tokens_p.mean_in_band": 0.5534855769230769, "tokens_rate.above_band": 0.8951612903225806, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10483870967741936 }, { "epoch": 1.2228376651043886, "grad_norm": 47.84149722393532, "learning_rate": 3.8437031928815927e-07, "loss": 0.2018, "step": 5740, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9093959731543624, "success_rate.epoch.env.math": 0.9661590524534687, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7941888619854721, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777995014369474, "success_rate.epoch.global": 0.8844124700239808, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954268292682927, "tokens_p.mean_in_band": 0.6428571428571429, "tokens_rate.above_band": 0.9590643274853801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04093567251461988 }, { "epoch": 1.223902854708138, "grad_norm": 34.12597138938547, "learning_rate": 3.843432322018164e-07, "loss": 0.2656, "step": 5745, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9108910891089109, "success_rate.epoch.env.math": 0.9663299663299664, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7946859903381642, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779961521809235, "success_rate.epoch.global": 0.8849642004773269, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987113402061856, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9797979797979798, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020202020202020204 }, { "epoch": 1.2249680443118875, "grad_norm": 83.03138468541954, "learning_rate": 3.843161273743008e-07, "loss": 0.1445, "step": 5750, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9108910891089109, "success_rate.epoch.env.math": 0.966499162479062, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7952095808383234, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8780591327854013, "success_rate.epoch.global": 0.8850356294536817, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915865384615384, "tokens_p.mean_in_band": 0.6643880208333334, "tokens_rate.above_band": 0.896551724137931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10344827586206896 }, { "epoch": 1.226033233915637, "grad_norm": 94.84688360452714, "learning_rate": 3.8428900482536637e-07, "loss": 0.2387, "step": 5755, "success_rate.epoch.env.abd": 0.9896907216494846, "success_rate.epoch.env.agentgym:alfworld": 0.9272727272727272, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9111842105263158, "success_rate.epoch.env.math": 0.9665551839464883, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7966706302021404, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8782434263933655, "success_rate.epoch.global": 0.885579196217494, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957482993197279, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9865771812080537, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013422818791946308 }, { "epoch": 1.2270984235193865, "grad_norm": 220.00251198916067, "learning_rate": 3.8426186457477974e-07, "loss": 0.3906, "step": 5760, "success_rate.epoch.env.abd": 0.9896907216494846, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9114754098360656, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7966903073286052, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8768246013777033, "success_rate.epoch.global": 0.8851764705882353, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973456964892412, "tokens_p.mean_in_band": 0.4713541666666667, "tokens_rate.above_band": 0.9932508436445444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006749156355455568 }, { "epoch": 1.228163613123136, "grad_norm": 95.04780368705549, "learning_rate": 3.842347066423205e-07, "loss": 0.2704, "step": 5765, "success_rate.epoch.env.abd": 0.9897959183673469, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9117647058823529, "success_rate.epoch.env.math": 0.9667221297836939, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7974087161366313, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8770286469557543, "success_rate.epoch.global": 0.8857142857142857, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991099683544303, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.2292288027268854, "grad_norm": 90.63497899350807, "learning_rate": 3.842075310477813e-07, "loss": 0.1603, "step": 5770, "success_rate.epoch.env.abd": 0.9897959183673469, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9123376623376623, "success_rate.epoch.env.math": 0.9667774086378738, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7978971962616822, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8771301665407123, "success_rate.epoch.global": 0.8857808857808858, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9903706395348837, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_rate.above_band": 0.9942196531791907, "tokens_rate.below_band": 0.005780346820809248, "tokens_rate.in_band": 0.0 }, { "epoch": 1.2302939923306349, "grad_norm": 59.86166946203114, "learning_rate": 3.841803378109674e-07, "loss": 0.1948, "step": 5775, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.912621359223301, "success_rate.epoch.env.math": 0.966996699669967, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7986030267753201, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8772494292646226, "success_rate.epoch.global": 0.8863109048723898, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980603448275862, "tokens_p.mean_in_band": 0.8763020833333334, "tokens_rate.above_band": 0.9948542024013722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005145797598627788 }, { "epoch": 1.2313591819343843, "grad_norm": 43.65369572163305, "learning_rate": 3.8415312695169707e-07, "loss": 0.2164, "step": 5780, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9131832797427653, "success_rate.epoch.env.math": 0.966996699669967, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7976878612716763, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.877264517066023, "success_rate.epoch.global": 0.8859122401847576, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972972972972973, "tokens_p.mean_in_band": 0.48758370535714285, "tokens_rate.above_band": 0.9814323607427056, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01856763925729443 }, { "epoch": 1.2324243715381338, "grad_norm": 53.00415811024405, "learning_rate": 3.8412589848980134e-07, "loss": 0.2911, "step": 5785, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9454545454545454, "success_rate.epoch.env.logic": 0.9140127388535032, "success_rate.epoch.env.math": 0.9670510708401977, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7983870967741935, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8775228063993507, "success_rate.epoch.global": 0.8864367816091954, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994791666666667, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.996309963099631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0036900369003690036 }, { "epoch": 1.2334895611418832, "grad_norm": 87.58181142258837, "learning_rate": 3.840986524451242e-07, "loss": 0.1401, "step": 5790, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9140127388535032, "success_rate.epoch.env.math": 0.967266775777414, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7993119266055045, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776930172971064, "success_rate.epoch.global": 0.8869565217391304, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.993439226519337, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9917808219178083, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00821917808219178 }, { "epoch": 1.2345547507456327, "grad_norm": 124.23957177387686, "learning_rate": 3.8407138883752233e-07, "loss": 0.2464, "step": 5795, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9150943396226415, "success_rate.epoch.env.math": 0.9674267100977199, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7988571428571428, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777645401462958, "success_rate.epoch.global": 0.8870159453302962, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99453125, "tokens_p.mean_in_band": 0.4817708333333333, "tokens_rate.above_band": 0.963855421686747, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03614457831325301 }, { "epoch": 1.2356199403493822, "grad_norm": 356.60832033058944, "learning_rate": 3.840441076868653e-07, "loss": 0.2465, "step": 5800, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9122807017543859, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.915625, "success_rate.epoch.env.math": 0.9676375404530745, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7995444191343963, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8780368295144836, "success_rate.epoch.global": 0.8875283446712018, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981387147335423, "tokens_p.mean_in_band": 0.71435546875, "tokens_rate.above_band": 0.9755351681957186, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024464831804281346 }, { "epoch": 1.2366851299531316, "grad_norm": 113.86725879165407, "learning_rate": 3.8401680901303535e-07, "loss": 0.3848, "step": 5805, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9122807017543859, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.9164086687306502, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7981859410430839, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8780379396192799, "success_rate.epoch.global": 0.8871331828442438, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963192840646651, "tokens_p.mean_in_band": 0.607421875, "tokens_rate.above_band": 0.9643652561247216, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035634743875278395 }, { "epoch": 1.237750319556881, "grad_norm": 95.98116374327142, "learning_rate": 3.8398949283592755e-07, "loss": 0.3242, "step": 5810, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9385964912280702, "success_rate.epoch.env.logic": 0.9164086687306502, "success_rate.epoch.env.math": 0.9678456591639871, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7993235625704622, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8775762729557237, "success_rate.epoch.global": 0.887191011235955, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967349531005733, "tokens_p.mean_in_band": 0.5277162532216495, "tokens_rate.above_band": 0.9081874112636062, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09181258873639375 }, { "epoch": 1.2388155091606305, "grad_norm": 150.36430497209574, "learning_rate": 3.839621591754498e-07, "loss": 0.2732, "step": 5815, "success_rate.epoch.env.abd": 0.9900990099009901, "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.916923076923077, "success_rate.epoch.env.math": 0.967948717948718, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7986501687289089, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776591035852913, "success_rate.epoch.global": 0.887248322147651, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988517060367454, "tokens_p.mean_in_band": 0.6783854166666666, "tokens_rate.above_band": 0.9844961240310077, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015503875968992248 }, { "epoch": 1.23988069876438, "grad_norm": 58.441257313308306, "learning_rate": 3.8393480805152263e-07, "loss": 0.2066, "step": 5820, "success_rate.epoch.env.abd": 0.9901960784313726, "success_rate.epoch.env.agentgym:alfworld": 0.9137931034482759, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9174311926605505, "success_rate.epoch.env.math": 0.968, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.799552071668533, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8769349726521053, "success_rate.epoch.global": 0.887305122494432, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9919871794871795, "tokens_p.mean_in_band": 0.6583059210526315, "tokens_rate.above_band": 0.8914285714285715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10857142857142857 }, { "epoch": 1.2409458883681295, "grad_norm": 74.37114795816692, "learning_rate": 3.8390743948407936e-07, "loss": 0.3807, "step": 5825, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9179331306990881, "success_rate.epoch.env.math": 0.9682539682539683, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7997762863534675, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8771655579722736, "success_rate.epoch.global": 0.8878048780487805, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968011811023622, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9806949806949807, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019305019305019305 }, { "epoch": 1.242011077971879, "grad_norm": 61.00403373274862, "learning_rate": 3.83880053493066e-07, "loss": 0.3101, "step": 5830, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9152542372881356, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9179331306990881, "success_rate.epoch.env.math": 0.9684542586750788, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7986651835372637, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8725373022999922, "success_rate.epoch.global": 0.8869757174392936, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961986571879937, "tokens_p.mean_in_band": 0.6516335227272727, "tokens_rate.above_band": 0.9504504504504504, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04954954954954955 }, { "epoch": 1.2430762675756284, "grad_norm": 306.54720973075433, "learning_rate": 3.8385265009844123e-07, "loss": 0.287, "step": 5835, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9181818181818182, "success_rate.epoch.env.math": 0.9686520376175548, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.7984496124031008, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8726866954517718, "success_rate.epoch.global": 0.8870329670329671, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950527704485488, "tokens_p.mean_in_band": 0.60703125, "tokens_rate.above_band": 0.974293059125964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02570694087403599 }, { "epoch": 1.2441414571793779, "grad_norm": 123.22668811418404, "learning_rate": 3.838252293201765e-07, "loss": 0.2486, "step": 5840, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9180327868852459, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9396551724137931, "success_rate.epoch.env.logic": 0.918429003021148, "success_rate.epoch.env.math": 0.9688473520249221, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7977900552486188, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720812833252851, "success_rate.epoch.global": 0.886652078774617, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972741433021807, "tokens_p.mean_in_band": 0.6458333333333334, "tokens_rate.above_band": 0.9756838905775076, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0243161094224924 }, { "epoch": 1.2452066467831273, "grad_norm": 197.33373961995605, "learning_rate": 3.837977911782558e-07, "loss": 0.4533, "step": 5845, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9193548387096774, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9396551724137931, "success_rate.epoch.env.logic": 0.9159159159159159, "success_rate.epoch.env.math": 0.9689922480620154, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7964796479647965, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718670518227825, "success_rate.epoch.global": 0.885838779956427, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971203071672355, "tokens_p.mean_in_band": 0.5521599264705882, "tokens_rate.above_band": 0.8960244648318043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10397553516819572 }, { "epoch": 1.2462718363868768, "grad_norm": 216.9435162596948, "learning_rate": 3.8377033569267596e-07, "loss": 0.287, "step": 5850, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9401709401709402, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9675925925925926, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7969264544456641, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720119383177447, "success_rate.epoch.global": 0.8859002169197397, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975511073253833, "tokens_p.mean_in_band": 0.029296875, "tokens_rate.above_band": 0.9982993197278912, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017006802721088435 }, { "epoch": 1.2473370259906262, "grad_norm": 137.2533340946094, "learning_rate": 3.837428628834463e-07, "loss": 0.298, "step": 5855, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.940677966101695, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9677914110429447, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7978142076502732, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714883616276761, "success_rate.epoch.global": 0.8859611231101512, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998807251908397, "tokens_p.mean_in_band": 0.6638569078947368, "tokens_rate.above_band": 0.9323843416370107, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06761565836298933 }, { "epoch": 1.2484022155943757, "grad_norm": 59.92096970708178, "learning_rate": 3.837153727705888e-07, "loss": 0.2479, "step": 5860, "success_rate.epoch.env.abd": 0.9903846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.940677966101695, "success_rate.epoch.env.logic": 0.9144542772861357, "success_rate.epoch.env.math": 0.9679878048780488, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7982551799345693, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713536643538181, "success_rate.epoch.global": 0.886021505376344, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989390432098766, "tokens_p.mean_in_band": 0.5291666666666667, "tokens_rate.above_band": 0.9773755656108597, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02262443438914027 }, { "epoch": 1.2494674051981254, "grad_norm": 136.98942295448293, "learning_rate": 3.83687865374138e-07, "loss": 0.485, "step": 5865, "success_rate.epoch.env.abd": 0.9903846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9125364431486881, "success_rate.epoch.env.math": 0.9681335356600911, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7976060935799782, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704149328476368, "success_rate.epoch.global": 0.8852248394004283, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.985437925170068, "tokens_p.mean_below_band": 3.948807716369629e-07, "tokens_p.mean_in_band": 0.4875812861271676, "tokens_rate.above_band": 0.6288770053475936, "tokens_rate.below_band": 0.0010695187165775401, "tokens_rate.in_band": 0.3700534759358289 }, { "epoch": 1.2505325948018746, "grad_norm": 222.50650366190857, "learning_rate": 3.8366034071414115e-07, "loss": 0.3273, "step": 5870, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9130434782608695, "success_rate.epoch.env.math": 0.9683734939759037, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7980456026058632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705311217154055, "success_rate.epoch.global": 0.8857142857142857, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949383802816901, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.993006993006993, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006993006993006993 }, { "epoch": 1.2515977844056243, "grad_norm": 154.859493260385, "learning_rate": 3.83632798810658e-07, "loss": 0.3507, "step": 5875, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9135446685878963, "success_rate.epoch.env.math": 0.968421052631579, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7982740021574973, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8706017715821633, "success_rate.epoch.global": 0.8857264231096007, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9883241758241759, "tokens_p.mean_in_band": 0.5513392857142857, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 1.2526629740093735, "grad_norm": 108.6232329111237, "learning_rate": 3.8360523968376096e-07, "loss": 0.5375, "step": 5880, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.9206349206349206, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9140401146131805, "success_rate.epoch.env.math": 0.9670658682634731, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7982832618025751, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705244553369137, "success_rate.epoch.global": 0.8853637901861252, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8222222222222223, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9898648648648649, "tokens_p.mean_in_band": 0.5659877232142857, "tokens_rate.above_band": 0.8409090909090909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1590909090909091 }, { "epoch": 1.2537281636131232, "grad_norm": 61.52741589906454, "learning_rate": 3.8357766335353487e-07, "loss": 0.0921, "step": 5885, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.921875, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9150141643059491, "success_rate.epoch.env.math": 0.9671641791044776, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7984994640943194, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708134292950857, "success_rate.epoch.global": 0.8858466722830666, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998061560150376, "tokens_p.mean_in_band": 0.884765625, "tokens_rate.above_band": 0.9962546816479401, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003745318352059925 }, { "epoch": 1.2547933532168725, "grad_norm": 88.37184818480591, "learning_rate": 3.835500698400771e-07, "loss": 0.3708, "step": 5890, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.9850746268656716, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9124293785310734, "success_rate.epoch.env.math": 0.9673105497771174, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7993596584845251, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707997787843657, "success_rate.epoch.global": 0.8859060402684564, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978837471783296, "tokens_p.mean_in_band": 0.630859375, "tokens_rate.above_band": 0.9866369710467706, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013363028953229399 }, { "epoch": 1.2558585428206221, "grad_norm": 31.21583025921499, "learning_rate": 3.8352245916349775e-07, "loss": 0.2606, "step": 5895, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9242424242424242, "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9101123595505618, "success_rate.epoch.env.math": 0.9674074074074074, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708014424972652, "success_rate.epoch.global": 0.8859649122807017, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994292237442922, "tokens_p.mean_in_band": 0.6861049107142857, "tokens_rate.above_band": 0.9690265486725663, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030973451327433628 }, { "epoch": 1.2569237324243716, "grad_norm": 215.8912277725402, "learning_rate": 3.834948313439191e-07, "loss": 0.139, "step": 5900, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9253731343283582, "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9103641456582633, "success_rate.epoch.env.math": 0.9676945668135095, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8004246284501062, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870991831956342, "success_rate.epoch.global": 0.8864392678868552, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982531055900621, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9969040247678018, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0030959752321981426 }, { "epoch": 1.257988922028121, "grad_norm": 31.410651242501206, "learning_rate": 3.834671864014763e-07, "loss": 0.2177, "step": 5905, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9855072463768116, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9108635097493036, "success_rate.epoch.env.math": 0.9678832116788321, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.801058201058201, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.871211744271923, "success_rate.epoch.global": 0.8869096934548467, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986482188295165, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.9703703703703703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02962962962962963 }, { "epoch": 1.2590541116318705, "grad_norm": 134.01227005014402, "learning_rate": 3.834395243563166e-07, "loss": 0.3778, "step": 5910, "success_rate.epoch.env.abd": 0.9906542056074766, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9113573407202216, "success_rate.epoch.env.math": 0.9680232558139535, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8016877637130801, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700547379704964, "success_rate.epoch.global": 0.886963696369637, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994119623655914, "tokens_p.mean_in_band": 0.7034505208333334, "tokens_rate.above_band": 0.96875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03125 }, { "epoch": 1.26011930123562, "grad_norm": 129.79729021954998, "learning_rate": 3.8341184522860004e-07, "loss": 0.2074, "step": 5915, "success_rate.epoch.env.abd": 0.9907407407407407, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9113573407202216, "success_rate.epoch.env.math": 0.968299711815562, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8021052631578948, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701622747947336, "success_rate.epoch.global": 0.8874281018898932, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966755319148937, "tokens_p.mean_in_band": 0.8271484375, "tokens_rate.above_band": 0.9724137931034482, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027586206896551724 }, { "epoch": 1.2611844908393695, "grad_norm": 308.50683494089435, "learning_rate": 3.833841490384989e-07, "loss": 0.3909, "step": 5920, "success_rate.epoch.env.abd": 0.990909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9116022099447514, "success_rate.epoch.env.math": 0.9684361549497847, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8006295907660022, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700780920797526, "success_rate.epoch.global": 0.8870703764320785, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666668, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982798165137615, "tokens_p.mean_in_band": 0.3977272727272727, "tokens_rate.above_band": 0.9519650655021834, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.048034934497816595 }, { "epoch": 1.262249680443119, "grad_norm": 274.2756468289808, "learning_rate": 3.83356435806198e-07, "loss": 0.2329, "step": 5925, "success_rate.epoch.env.abd": 0.9910714285714286, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9338842975206612, "success_rate.epoch.env.logic": 0.9118457300275482, "success_rate.epoch.env.math": 0.9685264663805436, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.801255230125523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702657292642499, "success_rate.epoch.global": 0.8875305623471883, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970211330935251, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.2633148700468684, "grad_norm": 76.7892158429907, "learning_rate": 3.833287055518946e-07, "loss": 0.1849, "step": 5930, "success_rate.epoch.env.abd": 0.9910714285714286, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9354838709677419, "success_rate.epoch.env.logic": 0.9120879120879121, "success_rate.epoch.env.math": 0.9686162624821684, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8010416666666667, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704219100051787, "success_rate.epoch.global": 0.8875811688311688, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979016786570744, "tokens_p.mean_in_band": 0.5078125, "tokens_rate.above_band": 0.9940405244338498, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0059594755661501785 }, { "epoch": 1.2643800596506178, "grad_norm": 52.75959508958796, "learning_rate": 3.8330095829579807e-07, "loss": 0.2147, "step": 5935, "success_rate.epoch.env.abd": 0.9912280701754386, "success_rate.epoch.env.agentgym:alfworld": 0.9264705882352942, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9365079365079365, "success_rate.epoch.env.logic": 0.9123287671232877, "success_rate.epoch.env.math": 0.968705547652916, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8006230529595015, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705212039727388, "success_rate.epoch.global": 0.8876313662085691, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973536450662739, "tokens_p.mean_in_band": 0.650390625, "tokens_rate.above_band": 0.9956011730205279, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004398826979472141 }, { "epoch": 1.2654452492543673, "grad_norm": 83.68690561754137, "learning_rate": 3.832731940581307e-07, "loss": 0.4615, "step": 5940, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9365079365079365, "success_rate.epoch.env.logic": 0.9128065395095368, "success_rate.epoch.env.math": 0.9688385269121813, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8012422360248447, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707368272463996, "success_rate.epoch.global": 0.8880837359098228, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975453172205438, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.993993993993994, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006006006006006006 }, { "epoch": 1.2665104388581168, "grad_norm": 93.4659904314759, "learning_rate": 3.8324541285912675e-07, "loss": 0.2463, "step": 5945, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9726027397260274, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9132791327913279, "success_rate.epoch.env.math": 0.9689703808180536, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8018575851393189, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8709277591840134, "success_rate.epoch.global": 0.888532477947073, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995, "tokens_p.mean_in_band": 0.8606770833333334, "tokens_rate.above_band": 0.9786476868327402, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021352313167259787 }, { "epoch": 1.2675756284618662, "grad_norm": 59.59307126097623, "learning_rate": 3.832176147190329e-07, "loss": 0.35, "step": 5950, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9112903225806451, "success_rate.epoch.env.math": 0.9691011235955056, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8012358393408857, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870768739218163, "success_rate.epoch.global": 0.8881789137380192, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967900302114804, "tokens_p.mean_in_band": 0.7013888888888888, "tokens_rate.above_band": 0.9484240687679083, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05157593123209169 }, { "epoch": 1.2686408180656157, "grad_norm": 132.65850479112723, "learning_rate": 3.8318979965810833e-07, "loss": 0.411, "step": 5955, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9093333333333333, "success_rate.epoch.env.math": 0.969187675070028, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7991803278688525, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704118347413609, "success_rate.epoch.global": 0.8870326173428799, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.6888888888888888, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9926136363636363, "tokens_p.mean_in_band": 0.5417668269230769, "tokens_rate.above_band": 0.8638743455497382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13612565445026178 }, { "epoch": 1.2697060076693651, "grad_norm": 58.71334465707662, "learning_rate": 3.831619676966244e-07, "loss": 0.1775, "step": 5960, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9095744680851063, "success_rate.epoch.env.math": 0.9693165969316597, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.799184505606524, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704777539726387, "success_rate.epoch.global": 0.8870839936608558, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977889150943396, "tokens_p.mean_in_band": 0.6994791666666667, "tokens_rate.above_band": 0.933920704845815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06607929515418502 }, { "epoch": 1.2707711972731146, "grad_norm": 60.509237642902214, "learning_rate": 3.8313411885486485e-07, "loss": 0.3625, "step": 5965, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.91005291005291, "success_rate.epoch.env.math": 0.9693165969316597, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7995951417004049, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705653940254049, "success_rate.epoch.global": 0.8871349644830308, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929435483870968, "tokens_p.mean_in_band": 0.6925381747159091, "tokens_rate.above_band": 0.9441624365482234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05583756345177665 }, { "epoch": 1.271836386876864, "grad_norm": 143.60654046455994, "learning_rate": 3.831062531531257e-07, "loss": 0.2114, "step": 5970, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.91005291005291, "success_rate.epoch.env.math": 0.9694868238557559, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8002018163471241, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695766011678788, "success_rate.epoch.global": 0.887185534591195, "success_rate.window.env.agentgym:sciworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998780137772675, "tokens_p.mean_in_band": 0.744140625, "tokens_rate.above_band": 0.9954285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004571428571428572 }, { "epoch": 1.2729015764806135, "grad_norm": 342.7154281216755, "learning_rate": 3.8307837061171537e-07, "loss": 0.3818, "step": 5975, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.927536231884058, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9102902374670184, "success_rate.epoch.env.math": 0.9697386519944979, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8004032258064516, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696834249075341, "success_rate.epoch.global": 0.8876272513703993, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998524678111588, "tokens_p.mean_in_band": 0.69921875, "tokens_rate.above_band": 0.9831223628691983, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016877637130801686 }, { "epoch": 1.273966766084363, "grad_norm": 144.83687020026025, "learning_rate": 3.8305047125095436e-07, "loss": 0.2534, "step": 5980, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9105263157894737, "success_rate.epoch.env.math": 0.9698216735253772, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7995991983967936, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684347480752916, "success_rate.epoch.global": 0.8868954758190327, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969642857142857, "tokens_p.mean_in_band": 0.49267578125, "tokens_rate.above_band": 0.9162303664921466, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08376963350785341 }, { "epoch": 1.2750319556881125, "grad_norm": 268.3398171037985, "learning_rate": 3.8302255509117553e-07, "loss": 0.3505, "step": 5985, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9105263157894737, "success_rate.epoch.env.math": 0.9699453551912568, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.799800796812749, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8685085785297525, "success_rate.epoch.global": 0.8869463869463869, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952083333333334, "tokens_p.mean_in_band": 0.7217881944444444, "tokens_rate.above_band": 0.9433962264150944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05660377358490566 }, { "epoch": 1.276097145291862, "grad_norm": 156.89110435172327, "learning_rate": 3.8299462215272396e-07, "loss": 0.2568, "step": 5990, "success_rate.epoch.env.abd": 0.9915254237288136, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9105263157894737, "success_rate.epoch.env.math": 0.9700680272108844, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.798810703666997, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684430050986491, "success_rate.epoch.global": 0.8866099071207431, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9893092105263158, "tokens_p.mean_in_band": 0.6774553571428571, "tokens_rate.above_band": 0.890625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.109375 }, { "epoch": 1.2771623348956114, "grad_norm": 125.48521639557713, "learning_rate": 3.829666724559571e-07, "loss": 0.2436, "step": 5995, "success_rate.epoch.env.abd": 0.9915966386554622, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9114583333333334, "success_rate.epoch.env.math": 0.9701897018970189, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.798219584569733, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684915312858653, "success_rate.epoch.global": 0.8866615265998458, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959375, "tokens_p.mean_in_band": 0.458984375, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 1.2782275244993608, "grad_norm": 77.57346437988627, "learning_rate": 3.829387060212443e-07, "loss": 0.2269, "step": 6000, "success_rate.epoch.env.abd": 0.9916666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9119170984455959, "success_rate.epoch.env.math": 0.9702702702702702, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7970443349753694, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684832399628342, "success_rate.epoch.global": 0.8863287250384024, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945733532934131, "tokens_p.mean_in_band": 0.458984375, "tokens_rate.above_band": 0.943502824858757, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05649717514124294 }, { "epoch": 1.2792927141031103, "grad_norm": 468.7125080289853, "learning_rate": 3.8291072286896745e-07, "loss": 0.4377, "step": 6005, "success_rate.epoch.env.abd": 0.9834710743801653, "success_rate.epoch.env.agentgym:alfworld": 0.9142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9095607235142119, "success_rate.epoch.env.math": 0.9703903095558546, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7970588235294118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675362000193563, "success_rate.epoch.global": 0.8856159143075746, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9833776595744681, "tokens_p.mean_in_band": 0.73687744140625, "tokens_rate.above_band": 0.7859531772575251, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2140468227424749 }, { "epoch": 1.2803579037068598, "grad_norm": 56.85665004058044, "learning_rate": 3.828827230195204e-07, "loss": 0.2405, "step": 6010, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.910025706940874, "success_rate.epoch.env.math": 0.9704301075268817, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7976539589442815, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86790832679462, "success_rate.epoch.global": 0.8860518292682927, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984789823008849, "tokens_p.mean_in_band": 0.7877604166666666, "tokens_rate.above_band": 0.9966923925027563, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0033076074972436605 }, { "epoch": 1.2814230933106092, "grad_norm": 37.589770371380546, "learning_rate": 3.828547064933092e-07, "loss": 0.3531, "step": 6015, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.9076923076923077, "success_rate.epoch.env.math": 0.9705488621151271, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7974683544303798, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677022386898112, "success_rate.epoch.global": 0.8857251328777525, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979104754829123, "tokens_p.mean_in_band": 0.46337890625, "tokens_rate.above_band": 0.9600570613409415, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039942938659058486 }, { "epoch": 1.2824882829143587, "grad_norm": 109.51942541623957, "learning_rate": 3.8282667331075224e-07, "loss": 0.4141, "step": 6020, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.9053708439897699, "success_rate.epoch.env.math": 0.9705882352941176, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7978723404255319, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675315019145025, "success_rate.epoch.global": 0.8854009077155824, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964717741935484, "tokens_p.mean_in_band": 0.6153067129629629, "tokens_rate.above_band": 0.9323308270676691, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06766917293233082 }, { "epoch": 1.2835534725181081, "grad_norm": 218.2487337213087, "learning_rate": 3.8279862349227977e-07, "loss": 0.2619, "step": 6025, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.9030612244897959, "success_rate.epoch.env.math": 0.9707446808510638, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7965284474445516, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86729835046072, "success_rate.epoch.global": 0.8847023360964582, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984182098765432, "tokens_p.mean_in_band": 0.4713792067307692, "tokens_rate.above_band": 0.9540636042402827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045936395759717315 }, { "epoch": 1.2846186621218578, "grad_norm": 17.019269052050042, "learning_rate": 3.8277055705833435e-07, "loss": 0.2387, "step": 6030, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.9033078880407125, "success_rate.epoch.env.math": 0.9709762532981531, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7971153846153847, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673951843851601, "success_rate.epoch.global": 0.8851351351351351, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.993421052631579, "tokens_p.mean_in_band": 0.7991071428571429, "tokens_rate.above_band": 0.9313725490196079, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06862745098039216 }, { "epoch": 1.285683851725607, "grad_norm": 83.17044845449945, "learning_rate": 3.827424740293705e-07, "loss": 0.2619, "step": 6035, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9037974683544304, "success_rate.epoch.env.math": 0.9710144927536232, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7978927203065134, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665787913477563, "success_rate.epoch.global": 0.8851907255048617, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989583333333333, "tokens_p.mean_below_band": 2.066371962428093e-09, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.9911054637865311, "tokens_rate.below_band": 0.0012706480304955528, "tokens_rate.in_band": 0.007623888182973317 }, { "epoch": 1.2867490413293567, "grad_norm": 69.87515097013174, "learning_rate": 3.827143744258551e-07, "loss": 0.2037, "step": 6040, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9045226130653267, "success_rate.epoch.env.math": 0.9710906701708278, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7984718242597899, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669326557846556, "success_rate.epoch.global": 0.8856184798807749, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0008765778401123, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.287814230933106, "grad_norm": 85.22130404435951, "learning_rate": 3.8268625826826685e-07, "loss": 0.2027, "step": 6045, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9025, "success_rate.epoch.env.math": 0.9712041884816754, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7994296577946768, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8668461774919652, "success_rate.epoch.global": 0.8856718634001485, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946415706051873, "tokens_p.mean_in_band": 0.45108695652173914, "tokens_rate.above_band": 0.9679218967921897, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03207810320781032 }, { "epoch": 1.2888794205368557, "grad_norm": 32.42391816339672, "learning_rate": 3.8265812557709656e-07, "loss": 0.2222, "step": 6050, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9029850746268657, "success_rate.epoch.env.math": 0.9713541666666666, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7992424242424242, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8668868883337475, "success_rate.epoch.global": 0.8857248520710059, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938271604938271, "tokens_p.mean_in_band": 0.5559895833333334, "tokens_rate.above_band": 0.9642857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03571428571428571 }, { "epoch": 1.289944610140605, "grad_norm": 41.161602536444484, "learning_rate": 3.8262997637284717e-07, "loss": 0.2037, "step": 6055, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9034653465346535, "success_rate.epoch.env.math": 0.9714656290531777, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.7992459943449576, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669410069152778, "success_rate.epoch.global": 0.8857774502579219, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952830188679245, "tokens_p.mean_in_band": 0.4703125, "tokens_rate.above_band": 0.8412698412698413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15873015873015872 }, { "epoch": 1.2910097997443546, "grad_norm": 57.31473458498728, "learning_rate": 3.826018106760336e-07, "loss": 0.433, "step": 6060, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9034653465346535, "success_rate.epoch.env.math": 0.9715394566623544, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8007483629560337, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670842974807552, "success_rate.epoch.global": 0.8861967694566814, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917763157894737, "tokens_p.mean_in_band": 0.7916666666666666, "tokens_rate.above_band": 0.9568345323741008, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04316546762589928 }, { "epoch": 1.2920749893481038, "grad_norm": 533.8718493512891, "learning_rate": 3.825736285071829e-07, "loss": 0.2421, "step": 6065, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9037037037037037, "success_rate.epoch.env.math": 0.9716129032258064, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8011152416356877, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671459958818605, "success_rate.epoch.global": 0.8862472567666423, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9889705882352942, "tokens_p.mean_below_band": 6.845220923423767e-08, "tokens_p.mean_in_band": 0.5380859375, "tokens_rate.above_band": 0.9714285714285714, "tokens_rate.below_band": 0.009523809523809525, "tokens_rate.in_band": 0.01904761904761905 }, { "epoch": 1.2931401789518535, "grad_norm": 68.82067571028459, "learning_rate": 3.8254542988683395e-07, "loss": 0.1398, "step": 6070, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9041769041769042, "success_rate.epoch.env.math": 0.9717223650385605, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8018518518518518, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867320677767917, "success_rate.epoch.global": 0.8866618075801749, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955645161290323, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.96875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03125 }, { "epoch": 1.294205368555603, "grad_norm": 364.4614280474005, "learning_rate": 3.8251721483553767e-07, "loss": 0.4229, "step": 6075, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9046454767726161, "success_rate.epoch.env.math": 0.971830985915493, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8025830258302583, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674513504687457, "success_rate.epoch.global": 0.8870733478576616, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957386363636364, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9734513274336283, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02654867256637168 }, { "epoch": 1.2952705581593524, "grad_norm": 64.39942196648772, "learning_rate": 3.8248898337385705e-07, "loss": 0.2027, "step": 6080, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9048780487804878, "success_rate.epoch.env.math": 0.9720101781170484, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8033088235294118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675547651877076, "success_rate.epoch.global": 0.8874819102749638, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9920343137254902, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.9807692307692307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019230769230769232 }, { "epoch": 1.296335747763102, "grad_norm": 398.46200063237325, "learning_rate": 3.82460735522367e-07, "loss": 0.2841, "step": 6085, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9178082191780822, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9402985074626866, "success_rate.epoch.env.logic": 0.9051094890510949, "success_rate.epoch.env.math": 0.9720812182741116, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8032936870997255, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676751237441678, "success_rate.epoch.global": 0.8875270367700072, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0005232558139534, "tokens_p.mean_in_band": 0.591796875, "tokens_rate.above_band": 0.9962928637627433, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0037071362372567192 }, { "epoch": 1.2974009373668514, "grad_norm": 134.76241440601828, "learning_rate": 3.824324713016543e-07, "loss": 0.4547, "step": 6090, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9054054054054054, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.9051094890510949, "success_rate.epoch.env.math": 0.9720812182741116, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8036363636363636, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666585624613958, "success_rate.epoch.global": 0.8872126436781609, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9928872497365648, "tokens_p.mean_in_band": 0.7869001116071429, "tokens_rate.above_band": 0.9713408393039918, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028659160696008188 }, { "epoch": 1.2984661269706008, "grad_norm": 58.32248069672788, "learning_rate": 3.824041907323177e-07, "loss": 0.2071, "step": 6095, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9054054054054054, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.9051094890510949, "success_rate.epoch.env.math": 0.9709962168978562, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.802536231884058, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8664989475249651, "success_rate.epoch.global": 0.8865425912670007, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9972141472868217, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9681050656660413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03189493433395872 }, { "epoch": 1.2995313165743503, "grad_norm": 210.08980179889673, "learning_rate": 3.8237589383496785e-07, "loss": 0.3818, "step": 6100, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.9054054054054054, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.9055690072639225, "success_rate.epoch.env.math": 0.9711055276381909, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8025247971145176, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665496197234762, "success_rate.epoch.global": 0.8865905848787446, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9907407407407407, "tokens_p.mean_in_band": 0.6702008928571429, "tokens_rate.above_band": 0.9204545454545454, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07954545454545454 }, { "epoch": 1.3005965061780997, "grad_norm": 213.66737535411383, "learning_rate": 3.823475806302274e-07, "loss": 0.1966, "step": 6105, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.9055690072639225, "success_rate.epoch.env.math": 0.9712140175219024, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8030575539568345, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669308919306377, "success_rate.epoch.global": 0.8869936034115139, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988727454909819, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3016616957818492, "grad_norm": 67.36991174330433, "learning_rate": 3.823192511387308e-07, "loss": 0.2368, "step": 6110, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9518072289156626, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9420289855072463, "success_rate.epoch.env.logic": 0.9060240963855422, "success_rate.epoch.env.math": 0.9712140175219024, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8032200357781754, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670368647861924, "success_rate.epoch.global": 0.8870396600566572, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953286082474226, "tokens_p.mean_in_band": 0.6263020833333334, "tokens_rate.above_band": 0.97, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03 }, { "epoch": 1.3027268853855987, "grad_norm": 139.07522932763922, "learning_rate": 3.8229090538112435e-07, "loss": 0.2915, "step": 6115, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9420289855072463, "success_rate.epoch.env.logic": 0.9064748201438849, "success_rate.epoch.env.math": 0.9713574097135741, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8035714285714286, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8672259062678461, "success_rate.epoch.global": 0.8874382498235709, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970067049808429, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9961832061068703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003816793893129771 }, { "epoch": 1.3037920749893481, "grad_norm": 10.219117398705716, "learning_rate": 3.822625433780662e-07, "loss": 0.1552, "step": 6120, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.9069212410501193, "success_rate.epoch.env.math": 0.9714640198511166, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8040961709706145, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674115450609026, "success_rate.epoch.global": 0.8878340365682138, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985687022900763, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3048572645930976, "grad_norm": 376.2719700624019, "learning_rate": 3.822341651502265e-07, "loss": 0.2478, "step": 6125, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.9073634204275535, "success_rate.epoch.env.math": 0.971604938271605, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8046181172291297, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867523188292948, "success_rate.epoch.global": 0.8882270497547302, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970238095238095, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9932432432432432, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006756756756756757 }, { "epoch": 1.305922454196847, "grad_norm": 166.41085091691906, "learning_rate": 3.8220577071828694e-07, "loss": 0.2998, "step": 6130, "success_rate.epoch.env.abd": 0.9844961240310077, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.9075829383886256, "success_rate.epoch.env.math": 0.9716748768472906, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8054818744473917, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676876386746528, "success_rate.epoch.global": 0.8886173184357542, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967905405405405, "tokens_p.mean_in_band": 0.74658203125, "tokens_rate.above_band": 0.9585492227979274, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04145077720207254 }, { "epoch": 1.3069876438005965, "grad_norm": 56.01492079179995, "learning_rate": 3.821773601029413e-07, "loss": 0.3176, "step": 6135, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9428571428571428, "success_rate.epoch.env.logic": 0.9080188679245284, "success_rate.epoch.env.math": 0.9717791411042945, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8059964726631393, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8678317434278079, "success_rate.epoch.global": 0.8890048712595685, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971751412429378, "tokens_p.mean_in_band": 0.8035714285714286, "tokens_rate.above_band": 0.9619565217391305, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03804347826086957 }, { "epoch": 1.308052833404346, "grad_norm": 121.03762223482265, "learning_rate": 3.82148933324895e-07, "loss": 0.3884, "step": 6140, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9436619718309859, "success_rate.epoch.env.logic": 0.908235294117647, "success_rate.epoch.env.math": 0.9706601466992665, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8056288478452067, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8678001139510895, "success_rate.epoch.global": 0.8886962552011096, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957749695493301, "tokens_p.mean_in_band": 0.5101776123046875, "tokens_rate.above_band": 0.9903498190591074, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009650180940892641 }, { "epoch": 1.3091180230080954, "grad_norm": 52.06903795901144, "learning_rate": 3.8212049040486525e-07, "loss": 0.1936, "step": 6145, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9088785046728972, "success_rate.epoch.env.math": 0.9708029197080292, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8059701492537313, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8679384100599528, "success_rate.epoch.global": 0.8890808569454043, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974926189649184, "tokens_p.mean_in_band": 0.79296875, "tokens_rate.above_band": 0.9996527777777777, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00034722222222222224 }, { "epoch": 1.310183212611845, "grad_norm": 83.59712480675947, "learning_rate": 3.8209203136358107e-07, "loss": 0.2384, "step": 6150, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9066666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9093023255813953, "success_rate.epoch.env.math": 0.9709090909090909, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8057742782152231, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8679687847027757, "success_rate.epoch.global": 0.8891184573002755, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940476190476191, "tokens_p.mean_in_band": 0.5809151785714286, "tokens_rate.above_band": 0.9473684210526315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05263157894736842 }, { "epoch": 1.3112484022155944, "grad_norm": 153.94520943657272, "learning_rate": 3.8206355622178314e-07, "loss": 0.2335, "step": 6155, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9078947368421053, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9076212471131639, "success_rate.epoch.env.math": 0.9710144927536232, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.806282722513089, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867983406325467, "success_rate.epoch.global": 0.8891557995881949, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965625, "tokens_p.mean_in_band": 0.7165178571428571, "tokens_rate.above_band": 0.9861932938856016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013806706114398421 }, { "epoch": 1.3123135918193438, "grad_norm": 380.4709944951116, "learning_rate": 3.8203506500022403e-07, "loss": 0.3757, "step": 6160, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9078947368421053, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9076212471131639, "success_rate.epoch.env.math": 0.9711191335740073, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8064236111111112, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8680532240943865, "success_rate.epoch.global": 0.8891928864569083, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995697463768116, "tokens_p.mean_in_band": 0.6383928571428571, "tokens_rate.above_band": 0.9517241379310345, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04827586206896552 }, { "epoch": 1.3133787814230933, "grad_norm": 312.2317876408597, "learning_rate": 3.8200655771966785e-07, "loss": 0.3792, "step": 6165, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.908256880733945, "success_rate.epoch.env.math": 0.9700239808153477, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8058925476603119, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8681778687148981, "success_rate.epoch.global": 0.8888888888888888, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9995321856287425, "tokens_p.mean_in_band": 0.6371527777777778, "tokens_rate.above_band": 0.9867060561299852, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013293943870014771 }, { "epoch": 1.3144439710268427, "grad_norm": 42.039312474474656, "learning_rate": 3.8197803440089063e-07, "loss": 0.3141, "step": 6170, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.908675799086758, "success_rate.epoch.env.math": 0.9702026221692491, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8053633217993079, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8682305104490379, "success_rate.epoch.global": 0.8889266304347826, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993573264781491, "tokens_p.mean_in_band": 0.4270833333333333, "tokens_rate.above_band": 0.9848101265822785, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015189873417721518 }, { "epoch": 1.3155091606305922, "grad_norm": 85.99984471279807, "learning_rate": 3.8194949506467983e-07, "loss": 0.2177, "step": 6175, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.909297052154195, "success_rate.epoch.env.math": 0.9703087885985748, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8048359240069085, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676545171755783, "success_rate.epoch.global": 0.8886255924170616, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979707792207793, "tokens_p.mean_in_band": 0.6045386904761905, "tokens_rate.above_band": 0.88, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12 }, { "epoch": 1.3165743502343417, "grad_norm": 115.68674587278218, "learning_rate": 3.819209397318347e-07, "loss": 0.2165, "step": 6180, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.909706546275395, "success_rate.epoch.env.math": 0.9703791469194313, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8046471600688468, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677368921487848, "success_rate.epoch.global": 0.888663967611336, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973341232227488, "tokens_p.mean_in_band": 0.509765625, "tokens_rate.above_band": 0.9547511312217195, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04524886877828054 }, { "epoch": 1.3176395398380911, "grad_norm": 200.31168383725677, "learning_rate": 3.818923684231663e-07, "loss": 0.1614, "step": 6185, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9102564102564102, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9101123595505618, "success_rate.epoch.env.math": 0.9705535924617197, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8041237113402062, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677773749194497, "success_rate.epoch.global": 0.8887020847343645, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996331828442438, "tokens_p.mean_in_band": 0.6244419642857143, "tokens_rate.above_band": 0.9693654266958425, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030634573304157548 }, { "epoch": 1.3187047294418406, "grad_norm": 448.88518888445986, "learning_rate": 3.818637811594971e-07, "loss": 0.4243, "step": 6190, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9101123595505618, "success_rate.epoch.env.math": 0.9706227967097533, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8042735042735043, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667942080674962, "success_rate.epoch.global": 0.8884048257372654, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978932584269663, "tokens_p.mean_in_band": 0.5501327514648438, "tokens_rate.above_band": 0.9910913140311804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008908685968819599 }, { "epoch": 1.31976991904559, "grad_norm": 135.21994319526834, "learning_rate": 3.818351779616613e-07, "loss": 0.1889, "step": 6195, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8987341772151899, "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9105145413870246, "success_rate.epoch.env.math": 0.9707259953161592, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8040885860306644, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8668581720640854, "success_rate.epoch.global": 0.8884435537742151, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973695286195287, "tokens_p.mean_in_band": 0.4270833333333333, "tokens_rate.above_band": 0.9801980198019802, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019801980198019802 }, { "epoch": 1.3208351086493395, "grad_norm": 114.61613603186188, "learning_rate": 3.818065588505047e-07, "loss": 0.2364, "step": 6200, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9107142857142857, "success_rate.epoch.env.math": 0.970828471411902, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8045879354290569, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665144847358884, "success_rate.epoch.global": 0.8884820239680427, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992481203007518, "tokens_p.mean_in_band": 0.6166666666666667, "tokens_rate.above_band": 0.9779411764705882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022058823529411766 }, { "epoch": 1.321900298253089, "grad_norm": 174.49326375883442, "learning_rate": 3.817779238468847e-07, "loss": 0.2573, "step": 6205, "success_rate.epoch.env.abd": 0.9849624060150376, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9107142857142857, "success_rate.epoch.env.math": 0.9709302325581395, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8037225042301185, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8664988514813732, "success_rate.epoch.global": 0.8881884538818845, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970154494382022, "tokens_p.mean_in_band": 0.5562855113636364, "tokens_rate.above_band": 0.9798165137614679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02018348623853211 }, { "epoch": 1.3229654878568384, "grad_norm": 95.52428811038516, "learning_rate": 3.817492729716704e-07, "loss": 0.3759, "step": 6210, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9086859688195991, "success_rate.epoch.env.math": 0.9710312862108922, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8042194092827004, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8664631180305989, "success_rate.epoch.global": 0.8882275132275133, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995213347921226, "tokens_p.mean_in_band": 0.51484375, "tokens_rate.above_band": 0.9682203389830508, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03177966101694915 }, { "epoch": 1.3240306774605881, "grad_norm": 138.64735888129488, "learning_rate": 3.817206062457422e-07, "loss": 0.2409, "step": 6215, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9710982658959537, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8043660789252729, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665193534486849, "success_rate.epoch.global": 0.8882663150955834, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949048913043478, "tokens_p.mean_in_band": 0.19791666666666666, "tokens_rate.above_band": 0.9387755102040817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061224489795918366 }, { "epoch": 1.3250958670643374, "grad_norm": 113.2946221991507, "learning_rate": 3.816919236899922e-07, "loss": 0.3553, "step": 6220, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9092920353982301, "success_rate.epoch.env.math": 0.9710982658959537, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8046744574290484, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666164435816008, "success_rate.epoch.global": 0.8883048620236531, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955110497237569, "tokens_p.mean_in_band": 0.5765625, "tokens_rate.above_band": 0.9731182795698925, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026881720430107527 }, { "epoch": 1.326161056668087, "grad_norm": 36.61378744028113, "learning_rate": 3.8166322532532417e-07, "loss": 0.3132, "step": 6225, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9100877192982456, "success_rate.epoch.env.math": 0.9711981566820277, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8051623646960866, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667422146682487, "success_rate.epoch.global": 0.8886705959397512, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958609271523179, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9741935483870968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025806451612903226 }, { "epoch": 1.3272262462718363, "grad_norm": 307.7916649063872, "learning_rate": 3.816345111726532e-07, "loss": 0.229, "step": 6230, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9452054794520548, "success_rate.epoch.env.logic": 0.9104803493449781, "success_rate.epoch.env.math": 0.9712313003452244, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8061309030654515, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866903324243199, "success_rate.epoch.global": 0.8890339425587467, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966603053435115, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9980952380952381, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019047619047619048 }, { "epoch": 1.328291435875586, "grad_norm": 100.5949741906352, "learning_rate": 3.8160578125290586e-07, "loss": 0.2162, "step": 6235, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9455782312925171, "success_rate.epoch.env.logic": 0.9084967320261438, "success_rate.epoch.env.math": 0.9712973593570609, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8067712634186622, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670406365125523, "success_rate.epoch.global": 0.8890696161353285, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998059866962306, "tokens_p.mean_in_band": 0.59765625, "tokens_rate.above_band": 0.9825708061002179, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017429193899782137 }, { "epoch": 1.3293566254793352, "grad_norm": 48.48533929342405, "learning_rate": 3.8157703558702046e-07, "loss": 0.189, "step": 6240, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9455782312925171, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9713631156930126, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8074074074074075, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671975120265581, "success_rate.epoch.global": 0.8894293125810635, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996415770609319, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.998211091234347, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017889087656529517 }, { "epoch": 1.3304218150830849, "grad_norm": 95.20059290147496, "learning_rate": 3.8154827419594663e-07, "loss": 0.1553, "step": 6245, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9096774193548387, "success_rate.epoch.env.math": 0.9714285714285714, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8080393765381461, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673476620068906, "success_rate.epoch.global": 0.8897866839043309, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964574898785425, "tokens_p.mean_in_band": 0.716796875, "tokens_rate.above_band": 0.9973082099596231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0026917900403768506 }, { "epoch": 1.3314870046868341, "grad_norm": 160.47813336048813, "learning_rate": 3.815194971006454e-07, "loss": 0.2896, "step": 6250, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9463087248322147, "success_rate.epoch.env.logic": 0.9098712446351931, "success_rate.epoch.env.math": 0.9714611872146118, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8086672117743254, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675162180731746, "success_rate.epoch.global": 0.8901417525773195, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995567375886525, "tokens_p.mean_in_band": 0.7443181818181818, "tokens_rate.above_band": 0.9871645274212368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012835472578763127 }, { "epoch": 1.3325521942905838, "grad_norm": 108.29357907157748, "learning_rate": 3.814907043220893e-07, "loss": 0.3148, "step": 6255, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9100642398286938, "success_rate.epoch.env.math": 0.9704209328782708, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8083197389885808, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669991579038747, "success_rate.epoch.global": 0.8895311496467566, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9960339168490153, "tokens_p.mean_in_band": 0.5755208333333334, "tokens_rate.above_band": 0.9326530612244898, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0673469387755102 }, { "epoch": 1.3336173838943333, "grad_norm": 164.84188773736184, "learning_rate": 3.814618958812623e-07, "loss": 0.2622, "step": 6260, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9102564102564102, "success_rate.epoch.env.math": 0.970554926387316, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8087876322213181, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671080758651055, "success_rate.epoch.global": 0.8898847631241997, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985576923076923, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3346825734980827, "grad_norm": 76.10561458682768, "learning_rate": 3.8143307179915983e-07, "loss": 0.2313, "step": 6265, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9106382978723404, "success_rate.epoch.env.math": 0.9707207207207207, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8084415584415584, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671264038805215, "success_rate.epoch.global": 0.8899170389278876, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979967948717948, "tokens_p.mean_in_band": 0.53515625, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 1.3357477631018322, "grad_norm": 79.28571324802336, "learning_rate": 3.8140423209678867e-07, "loss": 0.312, "step": 6270, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9110169491525424, "success_rate.epoch.env.math": 0.9707536557930259, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8090614886731392, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673780321179833, "success_rate.epoch.global": 0.8902671755725191, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975369458128078, "tokens_p.mean_in_band": 0.388671875, "tokens_rate.above_band": 0.9983606557377049, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001639344262295082 }, { "epoch": 1.3368129527055816, "grad_norm": 55.050706905604095, "learning_rate": 3.81375376795167e-07, "loss": 0.2543, "step": 6275, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9092827004219409, "success_rate.epoch.env.math": 0.9708520179372198, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8096774193548387, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8672947874370602, "success_rate.epoch.global": 0.8902980342422321, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0011363636363637, "tokens_p.mean_in_band": 0.49075520833333336, "tokens_rate.above_band": 0.952755905511811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047244094488188976 }, { "epoch": 1.337878142309331, "grad_norm": 144.94517013224367, "learning_rate": 3.813465059153243e-07, "loss": 0.2111, "step": 6280, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9094736842105263, "success_rate.epoch.env.math": 0.9709497206703911, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8102893890675241, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674181176225477, "success_rate.epoch.global": 0.8906447534766119, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977106227106227, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9927272727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007272727272727273 }, { "epoch": 1.3389433319130806, "grad_norm": 91.05019917995585, "learning_rate": 3.813176194783015e-07, "loss": 0.5581, "step": 6285, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9100418410041841, "success_rate.epoch.env.math": 0.9710144927536232, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8100961538461539, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626734007778343, "success_rate.epoch.global": 0.8903591682419659, "success_rate.window.env.babyai": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9917763157894737, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_p.mean_in_band": 0.8000710227272727, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.002824858757062147, "tokens_rate.in_band": 0.031073446327683617 }, { "epoch": 1.34000852151683, "grad_norm": 202.13569091379333, "learning_rate": 3.8128871750515076e-07, "loss": 0.2848, "step": 6290, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9012345679012346, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9104166666666667, "success_rate.epoch.env.math": 0.9711751662971175, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8105515587529976, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627634829699088, "success_rate.epoch.global": 0.8907035175879398, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9944852941176471, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3410737111205795, "grad_norm": 92.92925693652778, "learning_rate": 3.8125980001693577e-07, "loss": 0.3242, "step": 6295, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.891566265060241, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9107883817427386, "success_rate.epoch.env.math": 0.9712070874861573, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8105095541401274, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8619174219527497, "success_rate.epoch.global": 0.8904195366311835, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983054226475279, "tokens_p.mean_in_band": 0.6090494791666666, "tokens_rate.above_band": 0.990521327014218, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009478672985781991 }, { "epoch": 1.342138900724329, "grad_norm": 196.34498670796538, "learning_rate": 3.8123086703473126e-07, "loss": 0.1675, "step": 6300, "success_rate.epoch.env.abd": 0.9858156028368794, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9109730848861284, "success_rate.epoch.env.math": 0.9712707182320443, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8111111111111111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8621529346148148, "success_rate.epoch.global": 0.8907615480649188, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980673862310385, "tokens_p.mean_in_band": 0.76953125, "tokens_rate.above_band": 0.9953542392566783, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004645760743321719 }, { "epoch": 1.3432040903280784, "grad_norm": 99.48808804546846, "learning_rate": 3.8120191857962345e-07, "loss": 0.3759, "step": 6305, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9111570247933884, "success_rate.epoch.env.math": 0.9713340683572216, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8115597783056215, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624097855683808, "success_rate.epoch.global": 0.8911014312383323, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976635514018691, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9981343283582089, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0018656716417910447 }, { "epoch": 1.3442692799318279, "grad_norm": 164.3265206634844, "learning_rate": 3.811729546727097e-07, "loss": 0.1862, "step": 6310, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9477124183006536, "success_rate.epoch.env.logic": 0.911522633744856, "success_rate.epoch.env.math": 0.9713656387665198, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8113654301499605, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624772807144254, "success_rate.epoch.global": 0.8911290322580645, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968944099378882, "tokens_p.mean_in_band": 0.5966796875, "tokens_rate.above_band": 0.9877300613496932, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012269938650306749 }, { "epoch": 1.3453344695355773, "grad_norm": 60.41245540209164, "learning_rate": 3.811439753350988e-07, "loss": 0.1835, "step": 6315, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8941176470588236, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9117043121149897, "success_rate.epoch.env.math": 0.9714912280701754, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8110236220472441, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625354750821175, "success_rate.epoch.global": 0.891156462585034, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99822695035461, "tokens_p.mean_in_band": 0.4673549107142857, "tokens_rate.above_band": 0.9917936694021102, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008206330597889801 }, { "epoch": 1.3463996591393268, "grad_norm": 172.30674427863735, "learning_rate": 3.8111498058791055e-07, "loss": 0.3184, "step": 6320, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8850574712643678, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9117043121149897, "success_rate.epoch.env.math": 0.9715536105032823, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8116169544740973, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618354572643022, "success_rate.epoch.global": 0.8911837237977805, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993402111324377, "tokens_p.mean_in_band": 0.7966974431818182, "tokens_rate.above_band": 0.9895536562203229, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010446343779677113 }, { "epoch": 1.3474648487430763, "grad_norm": 209.80856495220658, "learning_rate": 3.810859704522762e-07, "loss": 0.2412, "step": 6325, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8850574712643678, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9122448979591836, "success_rate.epoch.env.math": 0.9716466739367503, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8120595144870791, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8619665945058426, "success_rate.epoch.global": 0.8915181315304241, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945833333333334, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013157894736842105 }, { "epoch": 1.3485300383468257, "grad_norm": 367.3057731166485, "learning_rate": 3.810569449493381e-07, "loss": 0.4214, "step": 6330, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8850574712643678, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9127789046653144, "success_rate.epoch.env.math": 0.9717391304347827, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8115715402658327, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8620381971306211, "success_rate.epoch.global": 0.8915441176470589, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971217105263158, "tokens_p.mean_in_band": 0.615625, "tokens_rate.above_band": 0.9681528662420382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03184713375796178 }, { "epoch": 1.3495952279505752, "grad_norm": 155.3766632946781, "learning_rate": 3.810279041002499e-07, "loss": 0.2507, "step": 6335, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9131313131313131, "success_rate.epoch.env.math": 0.971830985915493, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8113795791114575, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611468181786179, "success_rate.epoch.global": 0.8912645082467929, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978632478632479, "tokens_p.mean_in_band": 0.5912388392857143, "tokens_rate.above_band": 0.970954356846473, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029045643153526972 }, { "epoch": 1.3506604175543246, "grad_norm": 69.71023145249913, "learning_rate": 3.8099884792617625e-07, "loss": 0.2066, "step": 6340, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9131313131313131, "success_rate.epoch.env.math": 0.9718614718614719, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8116731517509728, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86132048769996, "success_rate.epoch.global": 0.8915956151035322, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981966590736523, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.351725607158074, "grad_norm": 140.59910021523186, "learning_rate": 3.809697764482932e-07, "loss": 0.2556, "step": 6345, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9134808853118712, "success_rate.epoch.env.math": 0.9719827586206896, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8122575640031032, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614164214446788, "success_rate.epoch.global": 0.8919247115968427, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997564935064935, "tokens_p.mean_in_band": 0.6625, "tokens_rate.above_band": 0.9390243902439024, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06097560975609756 }, { "epoch": 1.3527907967618236, "grad_norm": 37.347102505725914, "learning_rate": 3.8094068968778766e-07, "loss": 0.2749, "step": 6350, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.912, "success_rate.epoch.env.math": 0.9720730397422127, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8111455108359134, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614281415892088, "success_rate.epoch.global": 0.8913438256658596, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7499999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9968527843601895, "tokens_p.mean_in_band": 0.6088005514705882, "tokens_rate.above_band": 0.9254385964912281, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07456140350877193 }, { "epoch": 1.353855986365573, "grad_norm": 112.842414155859, "learning_rate": 3.8091158766585803e-07, "loss": 0.2128, "step": 6355, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9125248508946322, "success_rate.epoch.env.math": 0.9721329046087889, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8117283950617284, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611013866096741, "success_rate.epoch.global": 0.8913699456849729, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972527472527473, "tokens_p.mean_in_band": 0.6123046875, "tokens_rate.above_band": 0.883495145631068, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11650485436893204 }, { "epoch": 1.3549211759693225, "grad_norm": 86.09852786617513, "learning_rate": 3.808824704037136e-07, "loss": 0.2684, "step": 6360, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9128712871287129, "success_rate.epoch.env.math": 0.9722222222222222, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8109146810146042, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610670265915278, "success_rate.epoch.global": 0.8910950661853189, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.57177734375, "tokens_rate.above_band": 0.9090909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09090909090909091 }, { "epoch": 1.355986365573072, "grad_norm": 157.42789500147703, "learning_rate": 3.808533379225748e-07, "loss": 0.3104, "step": 6365, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.9133858267716536, "success_rate.epoch.env.math": 0.9722814498933902, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8113496932515337, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612141598591276, "success_rate.epoch.global": 0.8914217156568687, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959016393442623, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.993485342019544, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006514657980456026 }, { "epoch": 1.3570515551768214, "grad_norm": 29.18823663664378, "learning_rate": 3.808241902436731e-07, "loss": 0.2402, "step": 6370, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.8764044943820225, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.913894324853229, "success_rate.epoch.env.math": 0.9723404255319149, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.811017597551645, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861371596319636, "success_rate.epoch.global": 0.8914473684210527, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987258583690987, "tokens_p.mean_in_band": 0.3734375, "tokens_rate.above_band": 0.9893842887473461, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010615711252653927 }, { "epoch": 1.3581167447805709, "grad_norm": 44.11932529402207, "learning_rate": 3.807950273882513e-07, "loss": 0.2602, "step": 6375, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.9140625, "success_rate.epoch.env.math": 0.9724284199363733, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8114503816793893, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615993784362548, "success_rate.epoch.global": 0.8917710196779964, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994635193133047, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3591819343843203, "grad_norm": 132.48806301729334, "learning_rate": 3.807658493775629e-07, "loss": 0.2491, "step": 6380, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9142300194931774, "success_rate.epoch.env.math": 0.9714889123548046, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8118811881188119, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616037030367996, "success_rate.epoch.global": 0.89179548156956, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967749110320284, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9773913043478261, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022608695652173914 }, { "epoch": 1.3602471239880698, "grad_norm": 38.08547869801414, "learning_rate": 3.8073665623287276e-07, "loss": 0.2254, "step": 6385, "success_rate.epoch.env.abd": 0.9867549668874173, "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9142300194931774, "success_rate.epoch.env.math": 0.9715789473684211, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.812452543659833, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609189864255654, "success_rate.epoch.global": 0.8915234143449912, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957264957264957, "tokens_p.mean_in_band": 0.5834821428571428, "tokens_rate.above_band": 0.8698884758364313, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13011152416356878 }, { "epoch": 1.3613123135918195, "grad_norm": 186.08686477217364, "learning_rate": 3.807074479754565e-07, "loss": 0.2976, "step": 6390, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.914396887159533, "success_rate.epoch.env.math": 0.9716981132075472, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8130204390613172, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610045379967205, "success_rate.epoch.global": 0.8918439716312057, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988738738738738, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9910714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008928571428571428 }, { "epoch": 1.3623775031955687, "grad_norm": 185.12091954140467, "learning_rate": 3.80678224626601e-07, "loss": 0.2925, "step": 6395, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8777777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9127906976744186, "success_rate.epoch.env.math": 0.9717868338557993, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8135849056603773, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609493687508345, "success_rate.epoch.global": 0.8918680023571007, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993602362204724, "tokens_p.mean_in_band": 0.6879595588235294, "tokens_rate.above_band": 0.9739263803680982, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02607361963190184 }, { "epoch": 1.3634426927993184, "grad_norm": 34.22538408383357, "learning_rate": 3.80648986207604e-07, "loss": 0.371, "step": 6400, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9127906976744186, "success_rate.epoch.env.math": 0.9719334719334719, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8132530120481928, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610546274607064, "success_rate.epoch.global": 0.8918918918918919, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980440967283073, "tokens_p.mean_in_band": 0.578125, "tokens_rate.above_band": 0.9943422913719944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005657708628005658 }, { "epoch": 1.3645078824030676, "grad_norm": 438.43910397734777, "learning_rate": 3.8061973273977423e-07, "loss": 0.5133, "step": 6405, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9131274131274131, "success_rate.epoch.env.math": 0.9720785935884177, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8135338345864662, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611548500949635, "success_rate.epoch.global": 0.8922085530169889, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976756198347108, "tokens_p.mean_in_band": 0.55859375, "tokens_rate.above_band": 0.9918032786885246, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00819672131147541 }, { "epoch": 1.3655730720068173, "grad_norm": 188.1195347317062, "learning_rate": 3.8059046424443146e-07, "loss": 0.2558, "step": 6410, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9134615384615384, "success_rate.epoch.env.math": 0.9722222222222222, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8129226145755072, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861366985154048, "success_rate.epoch.global": 0.892231308411215, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988789237668162, "tokens_p.mean_in_band": 0.5125, "tokens_rate.above_band": 0.9780701754385965, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021929824561403508 }, { "epoch": 1.3666382616105666, "grad_norm": 104.15366686163136, "learning_rate": 3.805611807429063e-07, "loss": 0.3664, "step": 6415, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8791208791208791, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9136276391554703, "success_rate.epoch.env.math": 0.9723926380368099, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8125937031484258, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613676765250878, "success_rate.epoch.global": 0.8922539312754805, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904336734693877, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9333333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06666666666666667 }, { "epoch": 1.3677034512143162, "grad_norm": 434.8637491584342, "learning_rate": 3.805318822565403e-07, "loss": 0.3238, "step": 6420, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9141221374045801, "success_rate.epoch.env.math": 0.9724208375893769, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8125466766243465, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615303651401255, "success_rate.epoch.global": 0.8922764227642277, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998014502762431, "tokens_p.mean_in_band": 0.6802455357142857, "tokens_rate.above_band": 0.981029810298103, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018970189701897018 }, { "epoch": 1.3687686408180655, "grad_norm": 58.69290932814623, "learning_rate": 3.8050256880668617e-07, "loss": 0.1743, "step": 6425, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.963302752293578, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9142857142857143, "success_rate.epoch.env.math": 0.9726443768996961, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8126865671641791, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615782748429696, "success_rate.epoch.global": 0.8925883034163289, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978448275862069, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9886363636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011363636363636364 }, { "epoch": 1.3698338304218152, "grad_norm": 32.77634286680063, "learning_rate": 3.8047324041470714e-07, "loss": 0.2403, "step": 6430, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9147727272727273, "success_rate.epoch.env.math": 0.9726720647773279, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8125, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861668215331229, "success_rate.epoch.global": 0.8926096997690531, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978165938864629, "tokens_p.mean_in_band": 0.6396484375, "tokens_rate.above_band": 0.9772403982930299, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02275960170697013 }, { "epoch": 1.3708990200255646, "grad_norm": 54.888350228417536, "learning_rate": 3.8044389710197767e-07, "loss": 0.1723, "step": 6435, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9515151515151515, "success_rate.epoch.env.logic": 0.9147727272727273, "success_rate.epoch.env.math": 0.9727547931382442, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8133333333333334, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617783699688907, "success_rate.epoch.global": 0.8929188255613126, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980031948881789, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.990506329113924, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00949367088607595 }, { "epoch": 1.371964209629314, "grad_norm": 53.63212212620064, "learning_rate": 3.8041453888988286e-07, "loss": 0.1363, "step": 6440, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8804347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9149338374291115, "success_rate.epoch.env.math": 0.9728643216080402, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8138847858197932, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618796580155408, "success_rate.epoch.global": 0.8932261768082663, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996345029239766, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9985401459854014, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00145985401459854 }, { "epoch": 1.3730293992330636, "grad_norm": 61.93265022939555, "learning_rate": 3.8038516579981887e-07, "loss": 0.3874, "step": 6445, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8817204301075269, "success_rate.epoch.env.agentgym:sciworld": 0.963963963963964, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9150943396226415, "success_rate.epoch.env.math": 0.973, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8142962417096536, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8620608656495573, "success_rate.epoch.global": 0.8935317687464225, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984497389033943, "tokens_p.mean_in_band": 0.7203125, "tokens_rate.above_band": 0.9871134020618557, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01288659793814433 }, { "epoch": 1.374094588836813, "grad_norm": 181.94511050278481, "learning_rate": 3.8035577785319244e-07, "loss": 0.3039, "step": 6450, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8817204301075269, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9152542372881356, "success_rate.epoch.env.math": 0.9730807577268196, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8138337012509198, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611012822006194, "success_rate.epoch.global": 0.8929794520547946, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.6428571428571429, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976003490401396, "tokens_p.mean_in_band": 0.5589488636363636, "tokens_rate.above_band": 0.9630252100840336, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03697478991596639 }, { "epoch": 1.3751597784405625, "grad_norm": 68.81424314700064, "learning_rate": 3.803263750714215e-07, "loss": 0.1792, "step": 6455, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9152542372881356, "success_rate.epoch.env.math": 0.9731610337972167, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8145161290322581, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604400534979398, "success_rate.epoch.global": 0.8929994308480365, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968549250535332, "tokens_p.mean_in_band": 0.6298828125, "tokens_rate.above_band": 0.9915074309978769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008492569002123142 }, { "epoch": 1.376224968044312, "grad_norm": 60.39449018273993, "learning_rate": 3.802969574759344e-07, "loss": 0.198, "step": 6460, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9157303370786517, "success_rate.epoch.env.math": 0.9732408325074331, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8135964912280702, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604069862885347, "success_rate.epoch.global": 0.8927355278093076, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965018656716418, "tokens_p.mean_in_band": 0.6957465277777778, "tokens_rate.above_band": 0.9370629370629371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06293706293706294 }, { "epoch": 1.3772901576480614, "grad_norm": 32.850261336378935, "learning_rate": 3.802675250881706e-07, "loss": 0.2016, "step": 6465, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9158878504672897, "success_rate.epoch.env.math": 0.973293768545994, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8126822157434402, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8605061517548123, "success_rate.epoch.global": 0.8924731182795699, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992744610281924, "tokens_p.mean_in_band": 0.5710227272727273, "tokens_rate.above_band": 0.9820846905537459, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017915309446254073 }, { "epoch": 1.3783553472518109, "grad_norm": 248.38658953855327, "learning_rate": 3.8023807792958015e-07, "loss": 0.4306, "step": 6470, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9557522123893806, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9162011173184358, "success_rate.epoch.env.math": 0.9733727810650887, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8123636363636364, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8608611398295039, "success_rate.epoch.global": 0.8924943566591422, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982002617801047, "tokens_p.mean_in_band": 0.25, "tokens_rate.above_band": 0.9937565036420395, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006243496357960458 }, { "epoch": 1.3794205368555603, "grad_norm": 123.42322396420471, "learning_rate": 3.8020861602162395e-07, "loss": 0.4671, "step": 6475, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9520958083832335, "success_rate.epoch.env.logic": 0.9165120593692022, "success_rate.epoch.env.math": 0.9733990147783251, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8129079042784626, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602469487125529, "success_rate.epoch.global": 0.8925154755205402, "success_rate.window.env.agentgym:sciworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991666666666666, "tokens_p.mean_in_band": 0.5836588541666666, "tokens_rate.above_band": 0.9900990099009901, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009900990099009901 }, { "epoch": 1.3804857264593098, "grad_norm": 61.413473100282815, "learning_rate": 3.801791393857736e-07, "loss": 0.4028, "step": 6480, "success_rate.epoch.env.abd": 0.9871794871794872, "success_rate.epoch.env.agentgym:alfworld": 0.8762886597938144, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9165120593692022, "success_rate.epoch.env.math": 0.9734251968503937, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8129963898916968, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604079654818961, "success_rate.epoch.global": 0.8925364758698092, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998581847649919, "tokens_p.mean_in_band": 0.7317708333333334, "tokens_rate.above_band": 0.9903691813804173, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009630818619582664 }, { "epoch": 1.3815509160630592, "grad_norm": 145.8586619870465, "learning_rate": 3.801496480435114e-07, "loss": 0.2628, "step": 6485, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8762886597938144, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9172794117647058, "success_rate.epoch.env.math": 0.9734774066797642, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8132660418168709, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860514408524385, "success_rate.epoch.global": 0.8928371572467824, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964028776978417, "tokens_p.mean_in_band": 0.7965494791666666, "tokens_rate.above_band": 0.9205298013245033, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07947019867549669 }, { "epoch": 1.3826161056668087, "grad_norm": 77.10137556507472, "learning_rate": 3.8012014201633067e-07, "loss": 0.2933, "step": 6490, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9157509157509157, "success_rate.epoch.env.math": 0.9735294117647059, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8129496402877698, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8607184801675282, "success_rate.epoch.global": 0.892578125, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999738219895288, "tokens_p.mean_in_band": 0.4994140625, "tokens_rate.above_band": 0.9794871794871794, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020512820512820513 }, { "epoch": 1.3836812952705582, "grad_norm": 50.64381257559459, "learning_rate": 3.8009062132573487e-07, "loss": 0.7428, "step": 6495, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9157509157509157, "success_rate.epoch.env.math": 0.9735812133072407, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8140200286123033, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8608204974281708, "success_rate.epoch.global": 0.8928770172509738, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917929292929293, "tokens_p.mean_in_band": 0.853515625, "tokens_rate.above_band": 0.9801980198019802, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019801980198019802 }, { "epoch": 1.3847464848743076, "grad_norm": 23.621564802448294, "learning_rate": 3.800610859932387e-07, "loss": 0.4678, "step": 6500, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8613861386138614, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9159049360146252, "success_rate.epoch.env.math": 0.9736842105263158, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.814418272662384, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8592980902789751, "success_rate.epoch.global": 0.8926193118756937, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981269716088328, "tokens_p.mean_in_band": 0.4677734375, "tokens_rate.above_band": 0.9937304075235109, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006269592476489028 }, { "epoch": 1.385811674478057, "grad_norm": 121.00984645607008, "learning_rate": 3.800315360403672e-07, "loss": 0.6042, "step": 6505, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9162112932604736, "success_rate.epoch.env.math": 0.9737609329446064, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8139700641482538, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.858858522965361, "success_rate.epoch.global": 0.8923630326508024, "success_rate.window.env.abd": 0.5, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943633952254642, "tokens_p.mean_below_band": 4.3213367462158203e-07, "tokens_p.mean_in_band": 0.14775089073634204, "tokens_rate.above_band": 0.47125, "tokens_rate.below_band": 0.0025, "tokens_rate.in_band": 0.52625 }, { "epoch": 1.3868768640818065, "grad_norm": 91.12276951054956, "learning_rate": 3.800019714886562e-07, "loss": 0.1535, "step": 6510, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9162112932604736, "success_rate.epoch.env.math": 0.9738372093023255, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.813655761024182, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8592472290668148, "success_rate.epoch.global": 0.8923841059602649, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978885135135135, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9955156950672646, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004484304932735426 }, { "epoch": 1.387942053685556, "grad_norm": 253.19707789653654, "learning_rate": 3.799723923596521e-07, "loss": 0.3439, "step": 6515, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9165154264972777, "success_rate.epoch.env.math": 0.9739382239382239, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8134751773049645, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859267643989859, "success_rate.epoch.global": 0.8924050632911392, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9910714285714286, "tokens_p.mean_in_band": 0.5850694444444444, "tokens_rate.above_band": 0.9032258064516129, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0967741935483871 }, { "epoch": 1.3890072432893055, "grad_norm": 528.5307166821377, "learning_rate": 3.79942798674912e-07, "loss": 0.2511, "step": 6520, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9168173598553345, "success_rate.epoch.env.math": 0.973963355834137, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8141342756183746, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8594994607500381, "success_rate.epoch.global": 0.8927003293084522, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975609756097561, "tokens_p.mean_in_band": 0.7109375, "tokens_rate.above_band": 0.9808612440191388, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019138755980861243 }, { "epoch": 1.390072432893055, "grad_norm": 66.50404543134246, "learning_rate": 3.799131904560035e-07, "loss": 0.2614, "step": 6525, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9171171171171171, "success_rate.epoch.env.math": 0.9740384615384615, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8147887323943662, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.859593035272047, "success_rate.epoch.global": 0.8929939792008758, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9931640625, "tokens_p.mean_in_band": 0.8487723214285714, "tokens_rate.above_band": 0.9580838323353293, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041916167664670656 }, { "epoch": 1.3911376224968044, "grad_norm": 21.33630328405529, "learning_rate": 3.79883567724505e-07, "loss": 0.3121, "step": 6530, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9174147217235189, "success_rate.epoch.env.math": 0.974088291746641, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8151791988756149, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8597506389375315, "success_rate.epoch.global": 0.8932860262008734, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997120596205962, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3922028121005539, "grad_norm": 84.79092647423097, "learning_rate": 3.7985393050200505e-07, "loss": 0.2504, "step": 6535, "success_rate.epoch.env.abd": 0.98125, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9178571428571428, "success_rate.epoch.env.math": 0.9741379310344828, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.814866760168303, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8598171971100214, "success_rate.epoch.global": 0.8933043004899293, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994969512195122, "tokens_p.mean_in_band": 0.80625, "tokens_rate.above_band": 0.9761904761904762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023809523809523808 }, { "epoch": 1.3932680017043033, "grad_norm": 96.84051352345725, "learning_rate": 3.7982427881010335e-07, "loss": 0.3038, "step": 6540, "success_rate.epoch.env.abd": 0.98125, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9178571428571428, "success_rate.epoch.env.math": 0.9741873804971319, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8152022315202232, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8598521899113458, "success_rate.epoch.global": 0.8933224755700325, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9892146017699115, "tokens_p.mean_in_band": 0.6199776785714286, "tokens_rate.above_band": 0.9416666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058333333333333334 }, { "epoch": 1.3943331913080528, "grad_norm": 171.81048454330602, "learning_rate": 3.7979461267040973e-07, "loss": 0.3358, "step": 6545, "success_rate.epoch.env.abd": 0.9813664596273292, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.9491525423728814, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9185840707964602, "success_rate.epoch.env.math": 0.9742366412213741, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8152022315202232, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86007241788863, "success_rate.epoch.global": 0.8936112615051435, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9998263888888889, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3953983809118022, "grad_norm": 47.210620468723015, "learning_rate": 3.7976493210454457e-07, "loss": 0.2238, "step": 6550, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9185840707964602, "success_rate.epoch.env.math": 0.9743101807802094, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8150208623087621, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8601501133275978, "success_rate.epoch.global": 0.8936285097192225, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969152360515021, "tokens_p.mean_in_band": 0.7074652777777778, "tokens_rate.above_band": 0.9628099173553719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0371900826446281 }, { "epoch": 1.3964635705155517, "grad_norm": 292.0422152890326, "learning_rate": 3.7973523713413896e-07, "loss": 0.2168, "step": 6555, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9185840707964602, "success_rate.epoch.env.math": 0.9744075829383886, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8154059680777238, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603681982922274, "success_rate.epoch.global": 0.8939149165320409, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968112244897959, "tokens_p.mean_in_band": 0.8109375, "tokens_rate.above_band": 0.9915682967959528, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008431703204047217 }, { "epoch": 1.3975287601193012, "grad_norm": 76.40497848136185, "learning_rate": 3.7970552778083433e-07, "loss": 0.2701, "step": 6560, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9191564147627417, "success_rate.epoch.env.math": 0.9744560075685903, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.815916955017301, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860471085159142, "success_rate.epoch.global": 0.8941997851772288, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984158986175116, "tokens_p.mean_in_band": 0.861328125, "tokens_rate.above_band": 0.9954128440366973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0045871559633027525 }, { "epoch": 1.3985939497230506, "grad_norm": 120.90800533906611, "learning_rate": 3.796758040662827e-07, "loss": 0.1943, "step": 6565, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9195804195804196, "success_rate.epoch.env.math": 0.9744560075685903, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8162983425414365, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602859631293591, "success_rate.epoch.global": 0.8942153186930906, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944607023411371, "tokens_p.mean_in_band": 0.5891544117647058, "tokens_rate.above_band": 0.9462025316455697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05379746835443038 }, { "epoch": 1.3996591393268, "grad_norm": 39.58255868842192, "learning_rate": 3.7964606601214646e-07, "loss": 0.3347, "step": 6570, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8691588785046729, "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9198606271777003, "success_rate.epoch.env.math": 0.9745042492917847, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8169304886441845, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604102398942569, "success_rate.epoch.global": 0.8944978632478633, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953853626943006, "tokens_p.mean_in_band": 0.759765625, "tokens_rate.above_band": 0.9846938775510204, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015306122448979591 }, { "epoch": 1.4007243289305498, "grad_norm": 115.1670534981098, "learning_rate": 3.7961631364009843e-07, "loss": 0.3397, "step": 6575, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9745283018867924, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.817433081674674, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8606647686594546, "success_rate.epoch.global": 0.8947789025039957, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998991935483871, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9962917181705809, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003708281829419036 }, { "epoch": 1.401789518534299, "grad_norm": 45.89066912212665, "learning_rate": 3.7958654697182207e-07, "loss": 0.236, "step": 6580, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9745762711864406, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8176229508196722, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860866052571239, "success_rate.epoch.global": 0.8947927736450585, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9902777777777778, "tokens_p.mean_in_band": 0.4388020833333333, "tokens_rate.above_band": 0.9090909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09090909090909091 }, { "epoch": 1.4028547081380487, "grad_norm": 213.56654429197718, "learning_rate": 3.7955676602901094e-07, "loss": 0.2489, "step": 6585, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9204152249134948, "success_rate.epoch.env.math": 0.974671669793621, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8171896316507503, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8608964261797212, "success_rate.epoch.global": 0.8948065712771595, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969565217391304, "tokens_p.mean_below_band": 1.126900315284729e-07, "tokens_p.mean_in_band": 0.6805555555555556, "tokens_rate.above_band": 0.9829059829059829, "tokens_rate.below_band": 0.0017094017094017094, "tokens_rate.in_band": 0.015384615384615385 }, { "epoch": 1.403919897741798, "grad_norm": 318.4895880693358, "learning_rate": 3.7952697083336933e-07, "loss": 0.3475, "step": 6590, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9516129032258065, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9205526770293609, "success_rate.epoch.env.math": 0.9747899159663865, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8175629680054459, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609893739682678, "success_rate.epoch.global": 0.8950845665961945, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996929190751445, "tokens_p.mean_in_band": 0.810546875, "tokens_rate.above_band": 0.9774011299435028, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022598870056497175 }, { "epoch": 1.4049850873455476, "grad_norm": 194.14735251004694, "learning_rate": 3.7949716140661166e-07, "loss": 0.3152, "step": 6595, "success_rate.epoch.env.abd": 0.9819277108433735, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.952, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9205526770293609, "success_rate.epoch.env.math": 0.9748837209302326, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8172554347826086, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8610251705396682, "success_rate.epoch.global": 0.8950975224037955, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952400662251656, "tokens_p.mean_in_band": 0.67431640625, "tokens_rate.above_band": 0.949685534591195, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050314465408805034 }, { "epoch": 1.4060502769492969, "grad_norm": 77.83894711552718, "learning_rate": 3.7946733777046294e-07, "loss": 0.2131, "step": 6600, "success_rate.epoch.env.abd": 0.9820359281437125, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.952, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9205526770293609, "success_rate.epoch.env.math": 0.9749536178107606, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8166441136671178, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8609857880910661, "success_rate.epoch.global": 0.8948475289169295, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954637096774194, "tokens_p.mean_below_band": 4.4517219066619873e-07, "tokens_p.mean_in_band": 0.6354166666666666, "tokens_rate.above_band": 0.9465648854961832, "tokens_rate.below_band": 0.007633587786259542, "tokens_rate.in_band": 0.04580152671755725 }, { "epoch": 1.4071154665530465, "grad_norm": 63.397528101143806, "learning_rate": 3.7943749994665826e-07, "loss": 0.113, "step": 6605, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8715596330275229, "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9205526770293609, "success_rate.epoch.env.math": 0.9750462107208873, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8170155300472653, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611804383935682, "success_rate.epoch.global": 0.8951232302045097, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993734335839599, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4081806561567958, "grad_norm": 223.88233976494806, "learning_rate": 3.7940764795694333e-07, "loss": 0.3996, "step": 6610, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8715596330275229, "success_rate.epoch.env.agentgym:sciworld": 0.952755905511811, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9206896551724137, "success_rate.epoch.env.math": 0.9751381215469613, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8168350168350168, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612189228379079, "success_rate.epoch.global": 0.8951359832635983, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990828092243187, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9774590163934426, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022540983606557378 }, { "epoch": 1.4092458457605455, "grad_norm": 313.68536393246194, "learning_rate": 3.793777818230741e-07, "loss": 0.2412, "step": 6615, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.8715596330275229, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9206896551724137, "success_rate.epoch.env.math": 0.9742883379247016, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8173270651443922, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612625950956834, "success_rate.epoch.global": 0.8951486697965572, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0004171914357682, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9987421383647799, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012578616352201257 }, { "epoch": 1.410311035364295, "grad_norm": 94.8747274718839, "learning_rate": 3.7934790156681664e-07, "loss": 0.1694, "step": 6620, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.8727272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9553072625698324, "success_rate.epoch.env.logic": 0.9209621993127147, "success_rate.epoch.env.math": 0.9743354720439963, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8175720992622401, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861586241103208, "success_rate.epoch.global": 0.8954214360041624, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991086862575627, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4113762249680444, "grad_norm": 72.39484835246404, "learning_rate": 3.793180072099476e-07, "loss": 0.1445, "step": 6625, "success_rate.epoch.env.abd": 0.9823529411764705, "success_rate.epoch.env.agentgym:alfworld": 0.8727272727272727, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9213675213675213, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8176943699731903, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617128055596872, "success_rate.epoch.global": 0.8956927867151012, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993564073226545, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9994282447112636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0005717552887364208 }, { "epoch": 1.4124414145717938, "grad_norm": 54.82186480674202, "learning_rate": 3.792880987742537e-07, "loss": 0.3586, "step": 6630, "success_rate.epoch.env.abd": 0.9823529411764705, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9215017064846417, "success_rate.epoch.env.math": 0.9744058500914077, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8180602006688963, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617842551570477, "success_rate.epoch.global": 0.895703933747412, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988001745200699, "tokens_p.mean_in_band": 0.5962611607142857, "tokens_rate.above_band": 0.9761499148211243, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02385008517887564 }, { "epoch": 1.4135066041755433, "grad_norm": 109.14430459485727, "learning_rate": 3.79258176281532e-07, "loss": 0.2295, "step": 6635, "success_rate.epoch.env.abd": 0.9825581395348837, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9200680272108843, "success_rate.epoch.env.math": 0.9745222929936306, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8181818181818182, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616942169297569, "success_rate.epoch.global": 0.8957150232318017, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0013521634615385, "tokens_p.mean_in_band": 0.49383223684210525, "tokens_rate.above_band": 0.9704510108864697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029548989113530325 }, { "epoch": 1.4145717937792928, "grad_norm": 236.2002781402803, "learning_rate": 3.7922823975358987e-07, "loss": 0.4063, "step": 6640, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9204737732656514, "success_rate.epoch.env.math": 0.9745685740236149, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8178785857238159, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617259692735314, "success_rate.epoch.global": 0.8957260556127703, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956210191082803, "tokens_p.mean_in_band": 0.5041852678571429, "tokens_rate.above_band": 0.9573170731707317, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042682926829268296 }, { "epoch": 1.4156369833830422, "grad_norm": 37.2934130103539, "learning_rate": 3.791982892122448e-07, "loss": 0.2961, "step": 6645, "success_rate.epoch.env.abd": 0.9828571428571429, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9204737732656514, "success_rate.epoch.env.math": 0.9746376811594203, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8170326014637392, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617077354671402, "success_rate.epoch.global": 0.8954802259887006, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987359550561797, "tokens_p.mean_in_band": 0.50439453125, "tokens_rate.above_band": 0.96529284164859, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03470715835140998 }, { "epoch": 1.4167021729867917, "grad_norm": 260.61858205434055, "learning_rate": 3.7916832467932453e-07, "loss": 0.1875, "step": 6650, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9204737732656514, "success_rate.epoch.env.math": 0.9746835443037974, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8163129973474801, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616854610357696, "success_rate.epoch.global": 0.8952356557377049, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954637096774194, "tokens_p.mean_in_band": 0.5845170454545454, "tokens_rate.above_band": 0.9712793733681462, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028720626631853787 }, { "epoch": 1.4177673625905411, "grad_norm": 10.648756138245203, "learning_rate": 3.791383461766669e-07, "loss": 0.2567, "step": 6655, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9191919191919192, "success_rate.epoch.env.math": 0.974706413730804, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8166776968894772, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617385665319229, "success_rate.epoch.global": 0.8952478283086357, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988435660218671, "tokens_p.mean_in_band": 0.5840567129629629, "tokens_rate.above_band": 0.977796052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022203947368421052 }, { "epoch": 1.4188325521942906, "grad_norm": 312.8146916147841, "learning_rate": 3.791083537261202e-07, "loss": 0.2469, "step": 6660, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9195979899497487, "success_rate.epoch.env.math": 0.9738503155996393, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8172823218997362, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617526208625523, "success_rate.epoch.global": 0.8952599388379205, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929315476190477, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9824561403508771, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017543859649122806 }, { "epoch": 1.41989774179804, "grad_norm": 230.53629946637415, "learning_rate": 3.790783473495425e-07, "loss": 0.1912, "step": 6665, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9518716577540107, "success_rate.epoch.env.logic": 0.919732441471572, "success_rate.epoch.env.math": 0.9739208633093526, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8177631578947369, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613708696101697, "success_rate.epoch.global": 0.8952719877986782, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956005204053684, "tokens_p.mean_below_band": 1.525040715932846e-08, "tokens_p.mean_in_band": 0.5325520833333334, "tokens_rate.above_band": 0.8959509202453988, "tokens_rate.below_band": 0.000245398773006135, "tokens_rate.in_band": 0.10380368098159509 }, { "epoch": 1.4209629314017895, "grad_norm": 216.84921308729815, "learning_rate": 3.7904832706880244e-07, "loss": 0.3145, "step": 6670, "success_rate.epoch.env.abd": 0.9831460674157303, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9518716577540107, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9740143369175627, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8181221273801708, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614449806629131, "success_rate.epoch.global": 0.8955375253549696, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9894021739130435, "tokens_p.mean_in_band": 0.685546875, "tokens_rate.above_band": 0.9829059829059829, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017094017094017096 }, { "epoch": 1.422028121005539, "grad_norm": 159.3070869283212, "learning_rate": 3.790182929057785e-07, "loss": 0.1988, "step": 6675, "success_rate.epoch.env.abd": 0.9831460674157303, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9741532976827094, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8171690694626474, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613942447236476, "success_rate.epoch.global": 0.8952959028831563, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978316326530612, "tokens_p.mean_in_band": 0.6150173611111112, "tokens_rate.above_band": 0.9645669291338582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03543307086614173 }, { "epoch": 1.4230933106092885, "grad_norm": 148.83961974942127, "learning_rate": 3.789882448823593e-07, "loss": 0.304, "step": 6680, "success_rate.epoch.env.abd": 0.9832402234636871, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9742222222222222, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8171129980404964, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614269994483568, "success_rate.epoch.global": 0.8953077699293642, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944968553459119, "tokens_p.mean_in_band": 0.5299479166666666, "tokens_rate.above_band": 0.9137931034482759, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08620689655172414 }, { "epoch": 1.424158500213038, "grad_norm": 106.54259448152446, "learning_rate": 3.7895818302044375e-07, "loss": 0.2162, "step": 6685, "success_rate.epoch.env.abd": 0.9832402234636871, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9188741721854304, "success_rate.epoch.env.math": 0.974267968056788, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.817351598173516, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8614048290831243, "success_rate.epoch.global": 0.895319577252139, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976531620553359, "tokens_p.mean_in_band": 0.7421875, "tokens_rate.above_band": 0.9902152641878669, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009784735812133072 }, { "epoch": 1.4252236898167874, "grad_norm": 25.894717203224385, "learning_rate": 3.789281073419406e-07, "loss": 0.2225, "step": 6690, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9188741721854304, "success_rate.epoch.env.math": 0.974267968056788, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8184176394293126, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615102064581648, "success_rate.epoch.global": 0.8955823293172691, "success_rate.window.env.abd": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951746323529411, "tokens_p.mean_in_band": 0.740234375, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 1.4262888794205368, "grad_norm": 94.23598100955417, "learning_rate": 3.788980178687689e-07, "loss": 0.346, "step": 6695, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9191419141914191, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8182406209573092, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615267273523624, "success_rate.epoch.global": 0.8955933900851277, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915540540540541, "tokens_p.mean_in_band": 0.294921875, "tokens_rate.above_band": 0.9866666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013333333333333334 }, { "epoch": 1.4273540690242863, "grad_norm": 73.87194091231333, "learning_rate": 3.7886791462285753e-07, "loss": 0.2362, "step": 6700, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.9194078947368421, "success_rate.epoch.env.math": 0.9744493392070485, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8178294573643411, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616562465431431, "success_rate.epoch.global": 0.8956043956043956, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975671140939597, "tokens_p.mean_in_band": 0.6646205357142857, "tokens_rate.above_band": 0.9906914893617021, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009308510638297872 }, { "epoch": 1.4284192586280358, "grad_norm": 152.62958825739872, "learning_rate": 3.788377976261456e-07, "loss": 0.2612, "step": 6705, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9198036006546645, "success_rate.epoch.env.math": 0.9744942832014072, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8170103092783505, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616441484226303, "success_rate.epoch.global": 0.8953662182361734, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974343185550082, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.9744, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0256 }, { "epoch": 1.4294844482317852, "grad_norm": 176.73531538762126, "learning_rate": 3.788076669005821e-07, "loss": 0.2004, "step": 6710, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9199346405228758, "success_rate.epoch.env.math": 0.9745836985100789, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.817363344051447, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617968470262888, "success_rate.epoch.global": 0.8956262425447317, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978595890410958, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9984802431610942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001519756838905775 }, { "epoch": 1.4305496378355347, "grad_norm": 110.1182691765544, "learning_rate": 3.787775224681261e-07, "loss": 0.2539, "step": 6715, "success_rate.epoch.env.abd": 0.9834254143646409, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9533678756476683, "success_rate.epoch.env.logic": 0.9201954397394136, "success_rate.epoch.env.math": 0.9746724890829694, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8175979447655748, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618804058610645, "success_rate.epoch.global": 0.895884977689638, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981715425531915, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4316148274392841, "grad_norm": 86.2750031817364, "learning_rate": 3.7874736435074675e-07, "loss": 0.2888, "step": 6720, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9533678756476683, "success_rate.epoch.env.logic": 0.9201954397394136, "success_rate.epoch.env.math": 0.9747386759581882, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8175416133162612, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618977693554566, "success_rate.epoch.global": 0.895895153313551, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945211038961039, "tokens_p.mean_in_band": 0.7151988636363636, "tokens_rate.above_band": 0.9333333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06666666666666667 }, { "epoch": 1.4326800170430336, "grad_norm": 518.0878196517226, "learning_rate": 3.7871719257042293e-07, "loss": 0.3413, "step": 6725, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9204545454545454, "success_rate.epoch.env.math": 0.9748045178105995, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8171355498721228, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8619552821853471, "success_rate.epoch.global": 0.8959052787370498, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978187106156083, "tokens_p.mean_in_band": 0.732421875, "tokens_rate.above_band": 0.9980648282535075, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019351717464925011 }, { "epoch": 1.433745206646783, "grad_norm": 198.5140189718881, "learning_rate": 3.7868700714914366e-07, "loss": 0.2179, "step": 6730, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9207119741100324, "success_rate.epoch.env.math": 0.9748263888888888, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8179503500954806, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862054745817814, "success_rate.epoch.global": 0.8961614173228346, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99836867862969, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4348103962505325, "grad_norm": 88.38338475919998, "learning_rate": 3.786568081089079e-07, "loss": 0.1391, "step": 6735, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9209677419354839, "success_rate.epoch.env.math": 0.9749134948096886, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.818297331639136, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8621485293441381, "success_rate.epoch.global": 0.8964162984781542, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964171974522293, "tokens_p.mean_in_band": 0.730078125, "tokens_rate.above_band": 0.9401197604790419, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059880239520958084 }, { "epoch": 1.435875585854282, "grad_norm": 179.15100171992324, "learning_rate": 3.7862659547172443e-07, "loss": 0.3668, "step": 6740, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9209677419354839, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8189873417721519, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622191216462588, "success_rate.epoch.global": 0.8966699314397649, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9923664122137404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007633587786259542 }, { "epoch": 1.4369407754580314, "grad_norm": 99.12112667556465, "learning_rate": 3.78596369259612e-07, "loss": 0.2861, "step": 6745, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.9212218649517685, "success_rate.epoch.env.math": 0.9750215331610681, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.819672131147541, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623276247109736, "success_rate.epoch.global": 0.8969223253541768, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970128676470589, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9927007299270073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0072992700729927005 }, { "epoch": 1.4380059650617811, "grad_norm": 255.09485764190708, "learning_rate": 3.785661294945994e-07, "loss": 0.2561, "step": 6750, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9750859106529209, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8200125865324103, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622839547543241, "success_rate.epoch.global": 0.8969298245614035, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946120689655172, "tokens_p.mean_in_band": 0.6243489583333334, "tokens_rate.above_band": 0.9354838709677419, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06451612903225806 }, { "epoch": 1.4390711546655304, "grad_norm": 156.33161177317115, "learning_rate": 3.7853587619872496e-07, "loss": 0.5939, "step": 6755, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8761061946902655, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.9202551834130781, "success_rate.epoch.env.math": 0.9751499571550986, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8193224592220828, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622502368093177, "success_rate.epoch.global": 0.8966942148760331, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9929123711340206, "tokens_p.mean_in_band": 0.50225830078125, "tokens_rate.above_band": 0.9065420560747663, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09345794392523364 }, { "epoch": 1.44013634426928, "grad_norm": 97.21188495845038, "learning_rate": 3.7850560939403716e-07, "loss": 0.1818, "step": 6760, "success_rate.epoch.env.abd": 0.9837837837837838, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9202551834130781, "success_rate.epoch.env.math": 0.9743150684931506, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8197747183979975, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616583835259214, "success_rate.epoch.global": 0.8964597478176528, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994347545219638, "tokens_p.mean_in_band": 0.58984375, "tokens_rate.above_band": 0.9948586118251928, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005141388174807198 }, { "epoch": 1.4412015338730293, "grad_norm": 201.73881886585724, "learning_rate": 3.7847532910259425e-07, "loss": 0.2903, "step": 6765, "success_rate.epoch.env.abd": 0.9837837837837838, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.919047619047619, "success_rate.epoch.env.math": 0.9743808710503843, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8202247191011236, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615954961527307, "success_rate.epoch.global": 0.8964683115626512, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0003355061349692, "tokens_p.mean_in_band": 0.52125, "tokens_rate.above_band": 0.9630723781388478, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03692762186115214 }, { "epoch": 1.442266723476779, "grad_norm": 41.393958439182995, "learning_rate": 3.7844503534646426e-07, "loss": 0.2617, "step": 6770, "success_rate.epoch.env.abd": 0.9837837837837838, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9177215189873418, "success_rate.epoch.env.math": 0.9744245524296675, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.820784069695084, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615505276722074, "success_rate.epoch.global": 0.896476833976834, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997803514376997, "tokens_p.mean_in_band": 0.47709517045454547, "tokens_rate.above_band": 0.9861373660995589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013862633900441084 }, { "epoch": 1.4433319130805282, "grad_norm": 68.68731784915566, "learning_rate": 3.78414728147725e-07, "loss": 0.1593, "step": 6775, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9177215189873418, "success_rate.epoch.env.math": 0.9744680851063829, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8215613382899628, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616330717840025, "success_rate.epoch.global": 0.8967260471834376, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.8681640625, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 1.444397102684278, "grad_norm": 68.1099058041425, "learning_rate": 3.7838440752846426e-07, "loss": 0.2981, "step": 6780, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8620689655172413, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9162717219589257, "success_rate.epoch.env.math": 0.9745114698385726, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8218923933209648, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861085753547292, "success_rate.epoch.global": 0.8964937560038425, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958477135461605, "tokens_p.mean_in_band": 0.7134765625, "tokens_rate.above_band": 0.9914456800684346, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00855431993156544 }, { "epoch": 1.4454622922880271, "grad_norm": 32.46617417449706, "learning_rate": 3.783540735107794e-07, "loss": 0.3274, "step": 6785, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.916403785488959, "success_rate.epoch.env.math": 0.9745977984758679, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8223318938926588, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612527344990778, "success_rate.epoch.global": 0.896741734547197, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9944852941176471, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9870967741935484, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012903225806451613 }, { "epoch": 1.4465274818917768, "grad_norm": 69.98733159466664, "learning_rate": 3.7832372611677766e-07, "loss": 0.2678, "step": 6790, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, "success_rate.epoch.env.agentgym:sciworld": 0.9558823529411765, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.916403785488959, "success_rate.epoch.env.math": 0.9738396624472574, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8220443349753694, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.861228290289078, "success_rate.epoch.global": 0.8965105162523901, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9956232492997199, "tokens_p.mean_in_band": 0.5857319078947368, "tokens_rate.above_band": 0.949468085106383, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05053191489361702 }, { "epoch": 1.4475926714955263, "grad_norm": 67.61420977236945, "learning_rate": 3.78293365368576e-07, "loss": 0.2082, "step": 6795, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9165354330708662, "success_rate.epoch.env.math": 0.9738617200674536, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8223724646588814, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615381999030745, "success_rate.epoch.global": 0.896757272293753, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987348178137652, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4486578610992757, "grad_norm": 541.1037833542614, "learning_rate": 3.7826299128830116e-07, "loss": 0.5228, "step": 6800, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9169278996865203, "success_rate.epoch.env.math": 0.9739276703111859, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8226993865030675, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616174353769814, "success_rate.epoch.global": 0.8970028544243578, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_in_band": 0.8623046875, "tokens_rate.above_band": 0.9545454545454546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045454545454545456 }, { "epoch": 1.4497230507030252, "grad_norm": 83.02353934640807, "learning_rate": 3.782326038980895e-07, "loss": 0.3023, "step": 6805, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8632478632478633, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.917057902973396, "success_rate.epoch.env.math": 0.9740150880134115, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8219094247246022, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616570284430509, "success_rate.epoch.global": 0.8967726625533935, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952380952380953, "tokens_p.mean_in_band": 0.41268382352941174, "tokens_rate.above_band": 0.9251101321585903, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07488986784140969 }, { "epoch": 1.4507882403067747, "grad_norm": 239.44035564636363, "learning_rate": 3.782022032200871e-07, "loss": 0.3865, "step": 6810, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9173166926677067, "success_rate.epoch.env.math": 0.9740802675585284, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8215158924205379, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618038437770271, "success_rate.epoch.global": 0.896780303030303, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989669421487604, "tokens_p.mean_in_band": 0.3152043269230769, "tokens_rate.above_band": 0.9789644012944984, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021035598705501618 }, { "epoch": 1.4518534299105241, "grad_norm": 114.94718312539281, "learning_rate": 3.7817178927644983e-07, "loss": 0.1674, "step": 6815, "success_rate.epoch.env.abd": 0.9842105263157894, "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9174454828660437, "success_rate.epoch.env.math": 0.9741019214703425, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8220597196831201, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618899872713167, "success_rate.epoch.global": 0.8970240906943788, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984903381642513, "tokens_p.mean_in_band": 0.7921875, "tokens_rate.above_band": 0.9764150943396226, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02358490566037736 }, { "epoch": 1.4529186195142736, "grad_norm": 164.55254171959837, "learning_rate": 3.7814136208934306e-07, "loss": 0.2339, "step": 6820, "success_rate.epoch.env.abd": 0.9842931937172775, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9174454828660437, "success_rate.epoch.env.math": 0.974188176519567, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8218844984802431, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612290580649234, "success_rate.epoch.global": 0.8967954759660698, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977064220183486, "tokens_p.mean_in_band": 0.6595052083333334, "tokens_rate.above_band": 0.9732142857142857, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026785714285714284 }, { "epoch": 1.453983809118023, "grad_norm": 78.15388922492372, "learning_rate": 3.78110921680942e-07, "loss": 0.3585, "step": 6825, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9178294573643411, "success_rate.epoch.env.math": 0.9742096505823628, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8218181818181818, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612673251723557, "success_rate.epoch.global": 0.8968030089327692, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956521739130435, "tokens_p.mean_in_band": 0.46205357142857145, "tokens_rate.above_band": 0.9426229508196722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05737704918032787 }, { "epoch": 1.4550489987217725, "grad_norm": 123.26141996969243, "learning_rate": 3.780804680734314e-07, "loss": 0.3277, "step": 6830, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9180834621329211, "success_rate.epoch.env.math": 0.9734660033167496, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.822141560798548, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613604354154293, "success_rate.epoch.global": 0.8968105065666041, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985283933518005, "tokens_p.mean_in_band": 0.6070963541666666, "tokens_rate.above_band": 0.967828418230563, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032171581769437 }, { "epoch": 1.456114188325522, "grad_norm": 100.71200498254407, "learning_rate": 3.780500012890056e-07, "loss": 0.2128, "step": 6835, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9556650246305419, "success_rate.epoch.env.logic": 0.9185867895545314, "success_rate.epoch.env.math": 0.9735099337748344, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8223564954682779, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615357665356549, "success_rate.epoch.global": 0.8970519419747309, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999379652605459, "tokens_p.mean_in_band": 0.833984375, "tokens_rate.above_band": 0.9983484723369116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016515276630883566 }, { "epoch": 1.4571793779292714, "grad_norm": 210.8350261682488, "learning_rate": 3.7801952134986855e-07, "loss": 0.3259, "step": 6840, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9187116564417178, "success_rate.epoch.env.math": 0.9735099337748344, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8228915662650602, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612403932716223, "success_rate.epoch.global": 0.8970588235294118, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.6666666666666666, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948628048780488, "tokens_p.mean_in_band": 0.6137806792237442, "tokens_rate.above_band": 0.9034817100044072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09651828999559277 }, { "epoch": 1.458244567533021, "grad_norm": 87.25010672042312, "learning_rate": 3.779890282782339e-07, "loss": 0.3844, "step": 6845, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9188361408882083, "success_rate.epoch.env.math": 0.9735318444995864, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8225419664268585, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612219201200805, "success_rate.epoch.global": 0.8968327899394504, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.990234375, "tokens_p.mean_in_band": 0.7053125, "tokens_rate.above_band": 0.8175182481751825, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18248175182481752 }, { "epoch": 1.4593097571367704, "grad_norm": 171.02015220767652, "learning_rate": 3.7795852209632455e-07, "loss": 0.2679, "step": 6850, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9190839694656489, "success_rate.epoch.env.math": 0.9736191261335532, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8223684210526315, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8612366078325512, "success_rate.epoch.global": 0.8968401486988847, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9922816265060241, "tokens_p.mean_in_band": 0.6792279411764706, "tokens_rate.above_band": 0.907103825136612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09289617486338798 }, { "epoch": 1.4603749467405198, "grad_norm": 52.691519408635706, "learning_rate": 3.7792800282637344e-07, "loss": 0.238, "step": 6855, "success_rate.epoch.env.abd": 0.9844559585492227, "success_rate.epoch.env.agentgym:alfworld": 0.8583333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9194528875379939, "success_rate.epoch.env.math": 0.9736625514403292, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8227923627684964, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8613199936183885, "success_rate.epoch.global": 0.8970792767732962, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9936835106382979, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9894736842105263, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010526315789473684 }, { "epoch": 1.4614401363442693, "grad_norm": 94.91942663446012, "learning_rate": 3.7789747049062276e-07, "loss": 0.0882, "step": 6860, "success_rate.epoch.env.abd": 0.9845360824742269, "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9194528875379939, "success_rate.epoch.env.math": 0.9737274220032841, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8231089934484812, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8615730872128912, "success_rate.epoch.global": 0.8973172987974098, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998282967032967, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4625053259480187, "grad_norm": 347.3620080155369, "learning_rate": 3.7786692511132416e-07, "loss": 0.445, "step": 6865, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, "success_rate.epoch.env.agentgym:sciworld": 0.9577464788732394, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9194528875379939, "success_rate.epoch.env.math": 0.9737704918032787, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8237388724035608, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8616627928013145, "success_rate.epoch.global": 0.8975542224273189, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998291015625, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4635705155517682, "grad_norm": 33.484943461277815, "learning_rate": 3.7783636671073894e-07, "loss": 0.2315, "step": 6870, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9195751138088012, "success_rate.epoch.env.math": 0.9738562091503268, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8240521327014217, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617635253809339, "success_rate.epoch.global": 0.8977900552486188, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983231707317073, "tokens_p.mean_in_band": 0.66796875, "tokens_rate.above_band": 0.9808612440191388, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019138755980861243 }, { "epoch": 1.4646357051555177, "grad_norm": 16.238490908343042, "learning_rate": 3.778057953111378e-07, "loss": 0.2136, "step": 6875, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.860655737704918, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9198184568835098, "success_rate.epoch.env.math": 0.9738988580750407, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.824468085106383, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8618605975320118, "success_rate.epoch.global": 0.8980248047772164, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973569651741293, "tokens_p.mean_in_band": 0.8098958333333334, "tokens_rate.above_band": 0.9852941176470589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014705882352941176 }, { "epoch": 1.4657008947592671, "grad_norm": 62.57408491444139, "learning_rate": 3.77775210934801e-07, "loss": 0.2458, "step": 6880, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8617886178861789, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9519230769230769, "success_rate.epoch.env.logic": 0.9186746987951807, "success_rate.epoch.env.math": 0.9739625711960944, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8245717660956882, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8619217058778478, "success_rate.epoch.global": 0.8980293308890925, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967919340054996, "tokens_p.mean_in_band": 0.5326286764705882, "tokens_rate.above_band": 0.9846570397111913, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015342960288808664 }, { "epoch": 1.4667660843630166, "grad_norm": 117.12791554500112, "learning_rate": 3.7774461360401824e-07, "loss": 0.3792, "step": 6885, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9519230769230769, "success_rate.epoch.env.logic": 0.9186746987951807, "success_rate.epoch.env.math": 0.974025974025974, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8246027074749853, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8620316106168889, "success_rate.epoch.global": 0.8980338363054412, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963321596244131, "tokens_p.mean_in_band": 0.5731026785714286, "tokens_rate.above_band": 0.9681818181818181, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031818181818181815 }, { "epoch": 1.467831273966766, "grad_norm": 53.039999490607684, "learning_rate": 3.7771400334108855e-07, "loss": 0.1108, "step": 6890, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9187969924812031, "success_rate.epoch.env.math": 0.9740680713128039, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.825014679976512, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8621326956155518, "success_rate.epoch.global": 0.8982664233576643, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995575221238938, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9991158267020336, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0008841732979664014 }, { "epoch": 1.4688964635705155, "grad_norm": 121.38925392283164, "learning_rate": 3.776833801683206e-07, "loss": 0.2769, "step": 6895, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.95260663507109, "success_rate.epoch.env.logic": 0.918918918918919, "success_rate.epoch.env.math": 0.9733441033925686, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8254247217340364, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8621357577032921, "success_rate.epoch.global": 0.8982703686845699, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944282945736435, "tokens_p.mean_in_band": 0.5833333333333334, "tokens_rate.above_band": 0.9772727272727273, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022727272727272728 }, { "epoch": 1.469961653174265, "grad_norm": 355.2643071772762, "learning_rate": 3.776527441080322e-07, "loss": 0.2891, "step": 6900, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9194029850746268, "success_rate.epoch.env.math": 0.9733656174334141, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.82583284628872, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622391448268732, "success_rate.epoch.global": 0.8985013623978202, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975454980842912, "tokens_p.mean_in_band": 0.5755208333333334, "tokens_rate.above_band": 0.9886363636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011363636363636364 }, { "epoch": 1.4710268427780144, "grad_norm": 1.1010876385696688, "learning_rate": 3.776220951825508e-07, "loss": 0.2222, "step": 6905, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9196428571428571, "success_rate.epoch.env.math": 0.9734299516908212, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8250728862973761, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622047046067568, "success_rate.epoch.global": 0.8982782057091074, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946120689655172, "tokens_p.mean_in_band": 0.470703125, "tokens_rate.above_band": 0.9157894736842105, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08421052631578947 }, { "epoch": 1.472092032381764, "grad_norm": 52.237889166183265, "learning_rate": 3.77591433414213e-07, "loss": 0.3713, "step": 6910, "success_rate.epoch.env.abd": 0.9849246231155779, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9197622585438335, "success_rate.epoch.env.math": 0.9734726688102894, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8244186046511628, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8619078839060588, "success_rate.epoch.global": 0.8978300180831826, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959514170040485, "tokens_p.mean_in_band": 0.5463005514705882, "tokens_rate.above_band": 0.8790035587188612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12099644128113879 }, { "epoch": 1.4731572219855134, "grad_norm": 95.04107628637088, "learning_rate": 3.77560758825365e-07, "loss": 0.3626, "step": 6915, "success_rate.epoch.env.abd": 0.985, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9198813056379822, "success_rate.epoch.env.math": 0.9734939759036144, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8244495944380069, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8619504453035116, "success_rate.epoch.global": 0.8978349120433018, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976108562691132, "tokens_p.mean_in_band": 0.7584635416666666, "tokens_rate.above_band": 0.9819819819819819, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018018018018018018 }, { "epoch": 1.4742224115892628, "grad_norm": 27.531466838226915, "learning_rate": 3.775300714383621e-07, "loss": 0.1848, "step": 6920, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.856, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9186390532544378, "success_rate.epoch.env.math": 0.9735788630904724, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8240740740740741, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8611903103488389, "success_rate.epoch.global": 0.8973897389738974, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.994968220338983, "tokens_p.mean_in_band": 0.6028645833333334, "tokens_rate.above_band": 0.9291338582677166, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07086614173228346 }, { "epoch": 1.4752876011930123, "grad_norm": 231.5171896718186, "learning_rate": 3.774993712755692e-07, "loss": 0.5015, "step": 6925, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9175257731958762, "success_rate.epoch.env.math": 0.9736, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8240046162723601, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604671056448027, "success_rate.epoch.global": 0.8969465648854962, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0006236141906875, "tokens_p.mean_in_band": 0.443115234375, "tokens_rate.above_band": 0.9575371549893843, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04246284501061571 }, { "epoch": 1.4763527907967617, "grad_norm": 182.226877016691, "learning_rate": 3.774686583593602e-07, "loss": 0.2062, "step": 6930, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9175257731958762, "success_rate.epoch.env.math": 0.9736842105263158, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8239355581127733, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602279828921872, "success_rate.epoch.global": 0.8967293906810035, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954379562043796, "tokens_p.mean_in_band": 0.7049005681818182, "tokens_rate.above_band": 0.8616352201257862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13836477987421383 }, { "epoch": 1.4774179804005114, "grad_norm": 152.21272684933314, "learning_rate": 3.7743793271211853e-07, "loss": 0.2624, "step": 6935, "success_rate.epoch.env.abd": 0.9851485148514851, "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9177679882525698, "success_rate.epoch.env.math": 0.9737470167064439, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.824036802760207, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603995183736429, "success_rate.epoch.global": 0.8969602145730889, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0014982876712328, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4784831700042607, "grad_norm": 26.477416517540636, "learning_rate": 3.7740719435623683e-07, "loss": 0.4108, "step": 6940, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9178885630498533, "success_rate.epoch.env.math": 0.9738095238095238, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8239678899082569, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8604165482894406, "success_rate.epoch.global": 0.8969669937555754, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992739898989899, "tokens_p.mean_in_band": 0.37890625, "tokens_rate.above_band": 0.8918918918918919, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10810810810810811 }, { "epoch": 1.4795483596080103, "grad_norm": 36.71226746501737, "learning_rate": 3.77376443314117e-07, "loss": 0.3865, "step": 6945, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.91800878477306, "success_rate.epoch.env.math": 0.9738924050632911, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8232265446224256, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8603873757209666, "success_rate.epoch.global": 0.8967512238540276, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977571770334929, "tokens_p.mean_in_band": 0.5830965909090909, "tokens_rate.above_band": 0.95, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05 }, { "epoch": 1.4806135492117596, "grad_norm": 41.19193732455248, "learning_rate": 3.7734567960817005e-07, "loss": 0.1933, "step": 6950, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9493087557603687, "success_rate.epoch.env.logic": 0.9183673469387755, "success_rate.epoch.env.math": 0.9739336492890995, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8234285714285714, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8601683771825678, "success_rate.epoch.global": 0.8967584369449378, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994188596491228, "tokens_p.mean_in_band": 0.599476439790576, "tokens_rate.above_band": 0.8818069306930693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11819306930693069 }, { "epoch": 1.4816787388155093, "grad_norm": 67.65891632659265, "learning_rate": 3.7731490326081656e-07, "loss": 0.3133, "step": 6955, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9493087557603687, "success_rate.epoch.env.logic": 0.9171511627906976, "success_rate.epoch.env.math": 0.9739542225730071, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8238312428734321, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8601467781326686, "success_rate.epoch.global": 0.8967656180770935, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980279126213593, "tokens_p.mean_in_band": 0.701891447368421, "tokens_rate.above_band": 0.9559164733178654, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04408352668213457 }, { "epoch": 1.4827439284192585, "grad_norm": 41.2008785320527, "learning_rate": 3.7728411429448593e-07, "loss": 0.268, "step": 6960, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9495412844036697, "success_rate.epoch.env.logic": 0.9175108538350217, "success_rate.epoch.env.math": 0.9739952718676123, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8234624145785877, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602518423403533, "success_rate.epoch.global": 0.8967499447269511, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9966755319148937, "tokens_p.mean_in_band": 0.696875, "tokens_rate.above_band": 0.9868766404199475, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013123359580052493 }, { "epoch": 1.4838091180230082, "grad_norm": 85.90622121485156, "learning_rate": 3.7725331273161705e-07, "loss": 0.2284, "step": 6965, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9495412844036697, "success_rate.epoch.env.logic": 0.9177489177489178, "success_rate.epoch.env.math": 0.9732914375490966, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8231949971574758, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.860209923825714, "success_rate.epoch.global": 0.8965365100375028, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952116935483871, "tokens_p.mean_in_band": 0.57734375, "tokens_rate.above_band": 0.9253731343283582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07462686567164178 }, { "epoch": 1.4848743076267574, "grad_norm": 62.21930551436523, "learning_rate": 3.7722249859465783e-07, "loss": 0.3091, "step": 6970, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9497716894977168, "success_rate.epoch.env.logic": 0.9178674351585014, "success_rate.epoch.env.math": 0.9733750978856696, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8229284903518729, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8602316073534244, "success_rate.epoch.global": 0.8965441338322694, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963357300884956, "tokens_p.mean_in_band": 0.6690848214285714, "tokens_rate.above_band": 0.9847494553376906, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015250544662309368 }, { "epoch": 1.4859394972305071, "grad_norm": 112.12691753323395, "learning_rate": 3.771916719060654e-07, "loss": 0.3132, "step": 6975, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9497716894977168, "success_rate.epoch.env.logic": 0.9169054441260746, "success_rate.epoch.env.math": 0.9734167318217357, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.822562358276644, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630320574691871, "success_rate.epoch.global": 0.8963320887327038, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975353422619048, "tokens_p.mean_in_band": 0.43689903846153844, "tokens_rate.above_band": 0.9451476793248945, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05485232067510549 }, { "epoch": 1.4870046868342566, "grad_norm": 133.8394276351542, "learning_rate": 3.77160832688306e-07, "loss": 0.2747, "step": 6980, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9502262443438914, "success_rate.epoch.env.logic": 0.9172610556348074, "success_rate.epoch.env.math": 0.9734789391575663, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8221970554926388, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630781548243445, "success_rate.epoch.global": 0.8963401271093578, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997477578475337, "tokens_p.mean_in_band": 0.51953125, "tokens_rate.above_band": 0.9964253798033958, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0035746201966041107 }, { "epoch": 1.488069876438006, "grad_norm": 58.66485414134898, "learning_rate": 3.771299809638551e-07, "loss": 0.1686, "step": 6985, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9504504504504504, "success_rate.epoch.env.logic": 0.9176136363636364, "success_rate.epoch.env.math": 0.9735408560311284, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8219332956472584, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631122406301235, "success_rate.epoch.global": 0.896348130330199, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969117647058824, "tokens_p.mean_in_band": 0.706640625, "tokens_rate.above_band": 0.9883720930232558, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011627906976744186 }, { "epoch": 1.4891350660417555, "grad_norm": 26.88216210048663, "learning_rate": 3.770991167551972e-07, "loss": 0.2377, "step": 6990, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9178470254957507, "success_rate.epoch.env.math": 0.9736024844720497, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8221343873517787, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632174015335977, "success_rate.epoch.global": 0.8965742963124591, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0007468929254302, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9995222169135213, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00047778308647873863 }, { "epoch": 1.490200255645505, "grad_norm": 1026.8160156422239, "learning_rate": 3.770682400848258e-07, "loss": 0.4027, "step": 6995, "success_rate.epoch.env.abd": 0.9854368932038835, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9178470254957507, "success_rate.epoch.env.math": 0.9736434108527132, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8227349465391108, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633065816689779, "success_rate.epoch.global": 0.8967994774657086, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9926353503184714, "tokens_p.mean_in_band": 0.59375, "tokens_rate.above_band": 0.9936708860759493, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006329113924050633 }, { "epoch": 1.4912654452492544, "grad_norm": 319.6950635031251, "learning_rate": 3.7703735097524373e-07, "loss": 0.4081, "step": 7000, "success_rate.epoch.env.abd": 0.9854368932038835, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9180790960451978, "success_rate.epoch.env.math": 0.9736638264910922, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8210880538418396, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632844245585349, "success_rate.epoch.global": 0.896154681729307, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9978197674418605, "tokens_p.mean_in_band": 0.50927734375, "tokens_rate.above_band": 0.9416058394160584, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058394160583941604 }, { "epoch": 1.4923306348530039, "grad_norm": 69.18474094111437, "learning_rate": 3.770064494489627e-07, "loss": 0.2266, "step": 7005, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.918194640338505, "success_rate.epoch.env.math": 0.9737451737451738, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8213885778275476, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634124340894875, "success_rate.epoch.global": 0.8963797962280512, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9950770547945206, "tokens_p.mean_in_band": 0.845703125, "tokens_rate.above_band": 0.9733333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02666666666666667 }, { "epoch": 1.4933958244567533, "grad_norm": 202.44290441336236, "learning_rate": 3.769755355285035e-07, "loss": 0.338, "step": 7010, "success_rate.epoch.env.abd": 0.9855769230769231, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9186535764375876, "success_rate.epoch.env.math": 0.9737654320987654, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8212290502793296, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634478289626301, "success_rate.epoch.global": 0.8963876270819814, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933510638297872, "tokens_p.mean_in_band": 0.40792410714285715, "tokens_rate.above_band": 0.9306930693069307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06930693069306931 }, { "epoch": 1.4944610140605028, "grad_norm": 58.94943868995095, "learning_rate": 3.76944609236396e-07, "loss": 0.2973, "step": 7015, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9187675070028011, "success_rate.epoch.env.math": 0.9738058551617874, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8216276477146043, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863534666632005, "success_rate.epoch.global": 0.8966112669976257, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994169776119403, "tokens_p.mean_in_band": 0.818359375, "tokens_rate.above_band": 0.9962825278810409, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0037174721189591076 }, { "epoch": 1.4955262036642523, "grad_norm": 188.42181743688602, "learning_rate": 3.7691367059517906e-07, "loss": 0.2677, "step": 7020, "success_rate.epoch.env.abd": 0.9858490566037735, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9513274336283186, "success_rate.epoch.env.logic": 0.9187675070028011, "success_rate.epoch.env.math": 0.9738461538461538, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8219254312743461, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636932340476121, "success_rate.epoch.global": 0.896833943570967, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997184684684685, "tokens_p.mean_in_band": 0.6103515625, "tokens_rate.above_band": 0.9955156950672646, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004484304932735426 }, { "epoch": 1.4965913932680017, "grad_norm": 100.91654830386487, "learning_rate": 3.7688271962740057e-07, "loss": 0.2314, "step": 7025, "success_rate.epoch.env.abd": 0.9858490566037735, "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9192200557103064, "success_rate.epoch.env.math": 0.9738662567255957, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8221234018899388, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631773891979537, "success_rate.epoch.global": 0.8968407479045777, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984802431610942, "tokens_p.mean_in_band": 0.7484375, "tokens_rate.above_band": 0.9949596774193549, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005040322580645161 }, { "epoch": 1.4976565828717512, "grad_norm": 103.72381943962169, "learning_rate": 3.768517563556173e-07, "loss": 0.2753, "step": 7030, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9192200557103064, "success_rate.epoch.env.math": 0.973159509202454, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8227146814404432, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863172931778075, "success_rate.epoch.global": 0.896847523053828, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947139303482587, "tokens_p.mean_in_band": 0.6895833333333333, "tokens_rate.above_band": 0.9305555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06944444444444445 }, { "epoch": 1.4987217724755006, "grad_norm": 201.2032751309317, "learning_rate": 3.768207808023951e-07, "loss": 0.2265, "step": 7035, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9196675900277008, "success_rate.epoch.env.math": 0.9731800766283525, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8230088495575221, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633549757266935, "success_rate.epoch.global": 0.897068264498181, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998114224137931, "tokens_p.mean_in_band": 0.8177083333333334, "tokens_rate.above_band": 0.9747899159663865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025210084033613446 }, { "epoch": 1.49978696207925, "grad_norm": 36.486216099178016, "learning_rate": 3.767897929903088e-07, "loss": 0.1893, "step": 7040, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9186206896551724, "success_rate.epoch.env.math": 0.9732415902140673, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8233995584988962, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863300914104381, "success_rate.epoch.global": 0.8970745248772155, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987582781456954, "tokens_p.mean_in_band": 0.6867897727272727, "tokens_rate.above_band": 0.9320987654320988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06790123456790123 }, { "epoch": 1.5008521516829996, "grad_norm": 71.7137765131587, "learning_rate": 3.7675879294194194e-07, "loss": 0.3056, "step": 7045, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9188445667125172, "success_rate.epoch.env.math": 0.973302822273074, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8238855255916345, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633710119415707, "success_rate.epoch.global": 0.8972938418921799, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933139534883721, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9817351598173516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0182648401826484 }, { "epoch": 1.501917341286749, "grad_norm": 162.23594326269188, "learning_rate": 3.7672778067988725e-07, "loss": 0.2246, "step": 7050, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9191780821917809, "success_rate.epoch.env.math": 0.9725609756097561, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8238199780461032, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633513864148827, "success_rate.epoch.global": 0.897086965766532, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9936131386861314, "tokens_p.mean_in_band": 0.5534446022727273, "tokens_rate.above_band": 0.9256756756756757, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07432432432432433 }, { "epoch": 1.5029825308904985, "grad_norm": 74.87500521829533, "learning_rate": 3.766967562267462e-07, "loss": 0.2424, "step": 7055, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9193989071038251, "success_rate.epoch.env.math": 0.9726235741444867, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.823658269441402, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633856011562959, "success_rate.epoch.global": 0.8970931466157437, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962797619047619, "tokens_p.mean_in_band": 0.70859375, "tokens_rate.above_band": 0.9618320610687023, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03816793893129771 }, { "epoch": 1.504047720494248, "grad_norm": 125.536291463342, "learning_rate": 3.7666571960512916e-07, "loss": 0.198, "step": 7060, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8484848484848485, "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9182561307901907, "success_rate.epoch.env.math": 0.9726858877086495, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8238512035010941, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661497370229512, "success_rate.epoch.global": 0.897099301291552, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975715746421268, "tokens_p.mean_in_band": 0.7847222222222222, "tokens_rate.above_band": 0.9819277108433735, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018072289156626505 }, { "epoch": 1.5051129100979974, "grad_norm": 81.33511731632726, "learning_rate": 3.766346708376555e-07, "loss": 0.2534, "step": 7065, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9182561307901907, "success_rate.epoch.env.math": 0.972809667673716, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8240437158469945, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8663046136533429, "success_rate.epoch.global": 0.8973167124445384, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986033519553073, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5061780997017469, "grad_norm": 67.92568931057995, "learning_rate": 3.766036099469533e-07, "loss": 0.373, "step": 7070, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9519650655021834, "success_rate.epoch.env.logic": 0.9182561307901907, "success_rate.epoch.env.math": 0.9729323308270676, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.824235807860262, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8663717018759375, "success_rate.epoch.global": 0.8975332068311196, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0002806886227544, "tokens_p.mean_in_band": 0.78515625, "tokens_rate.above_band": 0.99800796812749, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00199203187250996 }, { "epoch": 1.5072432893054963, "grad_norm": 74.2854230790745, "learning_rate": 3.765725369556594e-07, "loss": 0.1376, "step": 7075, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9521739130434783, "success_rate.epoch.env.logic": 0.9184782608695652, "success_rate.epoch.env.math": 0.972972972972973, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8247142079477409, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866458067317215, "success_rate.epoch.global": 0.8977487902377446, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987529342723005, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9976580796252927, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00234192037470726 }, { "epoch": 1.5083084789092458, "grad_norm": 177.46371989335142, "learning_rate": 3.7654145188641964e-07, "loss": 0.2073, "step": 7080, "success_rate.epoch.env.abd": 0.986046511627907, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9185888738127544, "success_rate.epoch.env.math": 0.9730134932533733, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8245518739815317, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665040691138995, "success_rate.epoch.global": 0.897753516691161, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987012987012988, "tokens_p.mean_in_band": 0.537109375, "tokens_rate.above_band": 0.9897172236503856, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010282776349614395 }, { "epoch": 1.5093736685129953, "grad_norm": 92.38783690469769, "learning_rate": 3.765103547618887e-07, "loss": 0.25, "step": 7085, "success_rate.epoch.env.abd": 0.986046511627907, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9185888738127544, "success_rate.epoch.env.math": 0.9730740463724757, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8240389821331889, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8664849362262643, "success_rate.epoch.global": 0.8975487115021998, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964622641509434, "tokens_p.mean_in_band": 0.4986979166666667, "tokens_rate.above_band": 0.8983050847457628, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1016949152542373 }, { "epoch": 1.510438858116745, "grad_norm": 35.49707992488118, "learning_rate": 3.764792456047298e-07, "loss": 0.1749, "step": 7090, "success_rate.epoch.env.abd": 0.986046511627907, "success_rate.epoch.env.agentgym:alfworld": 0.8507462686567164, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9185888738127544, "success_rate.epoch.env.math": 0.9731743666169895, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8237837837837838, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665915348550868, "success_rate.epoch.global": 0.897553836504286, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000164907651715, "tokens_p.mean_in_band": 0.4184027777777778, "tokens_rate.above_band": 0.9921465968586387, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007853403141361256 }, { "epoch": 1.5115040477204942, "grad_norm": 198.1590266783774, "learning_rate": 3.7644812443761516e-07, "loss": 0.393, "step": 7095, "success_rate.epoch.env.abd": 0.9861751152073732, "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9186991869918699, "success_rate.epoch.env.math": 0.9732540861812778, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8228941684665226, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666401352369726, "success_rate.epoch.global": 0.8973503025245149, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986945169712794, "tokens_p.mean_in_band": 0.6633522727272727, "tokens_rate.above_band": 0.9720812182741116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027918781725888325 }, { "epoch": 1.5125692373242439, "grad_norm": 103.3237623062668, "learning_rate": 3.764169912832256e-07, "loss": 0.2709, "step": 7100, "success_rate.epoch.env.abd": 0.9861751152073732, "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.918809201623816, "success_rate.epoch.env.math": 0.9733135656041513, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.823371028540657, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667206069293957, "success_rate.epoch.global": 0.8975640224859462, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971098265895953, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.513634426927993, "grad_norm": 313.5706435826607, "learning_rate": 3.763858461642508e-07, "loss": 0.351, "step": 7105, "success_rate.epoch.env.abd": 0.9862385321100917, "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9527896995708155, "success_rate.epoch.env.logic": 0.918809201623816, "success_rate.epoch.env.math": 0.9733727810650887, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8238453276047261, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8667933727568492, "success_rate.epoch.global": 0.8977768543527945, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956801470588236, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9883720930232558, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011627906976744186 }, { "epoch": 1.5146996165317428, "grad_norm": 252.9462269290008, "learning_rate": 3.763546891033891e-07, "loss": 0.353, "step": 7110, "success_rate.epoch.env.abd": 0.9862385321100917, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9529914529914529, "success_rate.epoch.env.logic": 0.918918918918919, "success_rate.epoch.env.math": 0.9734317343173432, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8242229367631297, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8669604052791342, "success_rate.epoch.global": 0.897988803649181, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983689205219455, "tokens_p.mean_in_band": 0.8424479166666666, "tokens_rate.above_band": 0.9929328621908127, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007067137809187279 }, { "epoch": 1.515764806135492, "grad_norm": 98.49626600731914, "learning_rate": 3.7632352012334746e-07, "loss": 0.3528, "step": 7115, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.918918918918919, "success_rate.epoch.env.math": 0.9734317343173432, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8242521367521367, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86661131930367, "success_rate.epoch.global": 0.8977860542106352, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957843777197564, "tokens_p.mean_in_band": 0.672945205479452, "tokens_rate.above_band": 0.9402618657937807, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05973813420621931 }, { "epoch": 1.5168299957392417, "grad_norm": 372.763557444237, "learning_rate": 3.7629233924684166e-07, "loss": 0.552, "step": 7120, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9190283400809717, "success_rate.epoch.env.math": 0.9734904270986745, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8242811501597445, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666292399719417, "success_rate.epoch.global": 0.8977906256452612, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936733128834356, "tokens_p.mean_in_band": 0.7236328125, "tokens_rate.above_band": 0.9314285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06857142857142857 }, { "epoch": 1.517895185342991, "grad_norm": 27.659399049715127, "learning_rate": 3.762611464965961e-07, "loss": 0.1832, "step": 7125, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9191374663072777, "success_rate.epoch.env.math": 0.973568281938326, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8241232731137088, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666530602696308, "success_rate.epoch.global": 0.8977951782402638, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978780864197531, "tokens_p.mean_in_band": 0.4583333333333333, "tokens_rate.above_band": 0.9642857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03571428571428571 }, { "epoch": 1.5189603749467406, "grad_norm": 112.36836328028735, "learning_rate": 3.762299418953438e-07, "loss": 0.4154, "step": 7130, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.8540145985401459, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9192462987886945, "success_rate.epoch.env.math": 0.9736263736263736, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.823966065747614, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866759074894518, "success_rate.epoch.global": 0.8977997121118652, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984975961538461, "tokens_p.mean_in_band": 0.3404017857142857, "tokens_rate.above_band": 0.9674418604651163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03255813953488372 }, { "epoch": 1.5200255645504899, "grad_norm": 195.96966493282724, "learning_rate": 3.7619872546582654e-07, "loss": 0.1902, "step": 7135, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.8561151079136691, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9194630872483222, "success_rate.epoch.env.math": 0.9736263736263736, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8243386243386244, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8670289653014713, "success_rate.epoch.global": 0.8980094397701621, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994887107329843, "tokens_p.mean_in_band": 0.69921875, "tokens_rate.above_band": 0.9986928104575163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00130718954248366 }, { "epoch": 1.5210907541542396, "grad_norm": 119.41359684404878, "learning_rate": 3.7616749723079455e-07, "loss": 0.2443, "step": 7140, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9194630872483222, "success_rate.epoch.env.math": 0.9737609329446064, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8245243128964059, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671515104928534, "success_rate.epoch.global": 0.8982183084169568, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977678571428571, "tokens_p.mean_in_band": 0.77734375, "tokens_rate.above_band": 0.9966101694915255, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003389830508474576 }, { "epoch": 1.5221559437579888, "grad_norm": 110.2857509718093, "learning_rate": 3.761362572130067e-07, "loss": 0.3237, "step": 7145, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9198931909212283, "success_rate.epoch.env.math": 0.9737991266375546, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8243670886075949, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671797898634938, "success_rate.epoch.global": 0.8982219497240956, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942129629629629, "tokens_p.mean_in_band": 0.42041015625, "tokens_rate.above_band": 0.9759036144578314, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024096385542168676 }, { "epoch": 1.5232211333617385, "grad_norm": 134.41527561927634, "learning_rate": 3.761050054352306e-07, "loss": 0.3545, "step": 7150, "success_rate.epoch.env.abd": 0.9819819819819819, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9537815126050421, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9738562091503268, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8247368421052632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8672534434359714, "success_rate.epoch.global": 0.898429532939017, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993562734082397, "tokens_p.mean_in_band": 0.82421875, "tokens_rate.above_band": 0.9907235621521335, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00927643784786642 }, { "epoch": 1.5242863229654877, "grad_norm": 36.09520468714888, "learning_rate": 3.7607374192024224e-07, "loss": 0.2294, "step": 7155, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9201065246338216, "success_rate.epoch.env.math": 0.9739130434782609, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8250131371518655, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674104440099548, "success_rate.epoch.global": 0.8986362711174435, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969812164579607, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.9893805309734514, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010619469026548672 }, { "epoch": 1.5253515125692374, "grad_norm": 58.54681697123234, "learning_rate": 3.7604246669082633e-07, "loss": 0.2206, "step": 7160, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9201065246338216, "success_rate.epoch.env.math": 0.9725631768953069, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8253801782905087, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673420093066817, "success_rate.epoch.global": 0.8984359130611416, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.6, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995045731707317, "tokens_p.mean_in_band": 0.5724609375, "tokens_rate.above_band": 0.9425287356321839, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05747126436781609 }, { "epoch": 1.5264167021729866, "grad_norm": 96.52544308919609, "learning_rate": 3.76011179769776e-07, "loss": 0.3362, "step": 7165, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9201065246338216, "success_rate.epoch.env.math": 0.9726618705035971, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8246991104133962, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673097226537084, "success_rate.epoch.global": 0.8982363673221163, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947033898305084, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.946524064171123, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053475935828877004 }, { "epoch": 1.5274818917767363, "grad_norm": 201.72454681434493, "learning_rate": 3.7597988117989286e-07, "loss": 0.29, "step": 7170, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9204244031830239, "success_rate.epoch.env.math": 0.9727011494252874, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.825065274151436, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673827588430476, "success_rate.epoch.global": 0.8984422415537123, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977189781021898, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5285470813804856, "grad_norm": 121.52833879106123, "learning_rate": 3.759485709439871e-07, "loss": 0.3213, "step": 7175, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9207397622192867, "success_rate.epoch.env.math": 0.9727793696275072, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8253388946819604, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674434133675029, "success_rate.epoch.global": 0.8986472844740562, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.989480198019802, "tokens_p.mean_in_band": 0.818359375, "tokens_rate.above_band": 0.9805825242718447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019417475728155338 }, { "epoch": 1.5296122709842352, "grad_norm": 154.19701165781498, "learning_rate": 3.7591724908487754e-07, "loss": 0.1985, "step": 7180, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8581560283687943, "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.920844327176781, "success_rate.epoch.env.math": 0.9728377412437456, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8257930317212688, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867519915522985, "success_rate.epoch.global": 0.8988515011082007, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969706632653061, "tokens_p.mean_in_band": 0.6653645833333334, "tokens_rate.above_band": 0.9849246231155779, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01507537688442211 }, { "epoch": 1.5306774605879847, "grad_norm": 314.0843373614995, "learning_rate": 3.758859156253912e-07, "loss": 0.3544, "step": 7185, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8591549295774648, "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9541666666666667, "success_rate.epoch.env.logic": 0.9209486166007905, "success_rate.epoch.env.math": 0.9729344729344729, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8254545454545454, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676156616435177, "success_rate.epoch.global": 0.8988538105771164, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996811224489796, "tokens_p.mean_in_band": 0.24140625, "tokens_rate.above_band": 0.9936628643852978, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0063371356147021544 }, { "epoch": 1.5317426501917342, "grad_norm": 66.80951682709765, "learning_rate": 3.758545705883637e-07, "loss": 0.287, "step": 7190, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9541666666666667, "success_rate.epoch.env.logic": 0.9210526315789473, "success_rate.epoch.env.math": 0.9730113636363636, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8257261410788381, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677463372677662, "success_rate.epoch.global": 0.8990567930965282, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996170343137255, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9951219512195122, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004878048780487805 }, { "epoch": 1.5328078397954836, "grad_norm": 71.9227432628488, "learning_rate": 3.7582321399663913e-07, "loss": 0.7006, "step": 7195, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9198423127463863, "success_rate.epoch.env.math": 0.9730113636363636, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8254790264111859, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8672817370868615, "success_rate.epoch.global": 0.8986581213699179, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.611111111111111, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.997301633605601, "tokens_p.mean_in_band": 0.5135135135135135, "tokens_rate.above_band": 0.9586129753914989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04138702460850112 }, { "epoch": 1.533873029399233, "grad_norm": 115.53448741846098, "learning_rate": 3.7579184587306987e-07, "loss": 0.449, "step": 7200, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9199475065616798, "success_rate.epoch.env.math": 0.9730496453900709, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.825503355704698, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673169064425795, "success_rate.epoch.global": 0.8986608035178892, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947311046511628, "tokens_p.mean_in_band": 0.6859375, "tokens_rate.above_band": 0.9717514124293786, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02824858757062147 }, { "epoch": 1.5349382190029826, "grad_norm": 122.81173187534647, "learning_rate": 3.757604662405168e-07, "loss": 0.1973, "step": 7205, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8601398601398601, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9199475065616798, "success_rate.epoch.env.math": 0.9731638418079096, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8258629572385369, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8673599789836411, "success_rate.epoch.global": 0.8988629563135847, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949882075471698, "tokens_p.mean_in_band": 0.7604166666666666, "tokens_rate.above_band": 0.9724770642201835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027522935779816515 }, { "epoch": 1.536003408606732, "grad_norm": 28.45209197541216, "learning_rate": 3.7572907512184926e-07, "loss": 0.2688, "step": 7210, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9615384615384616, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9187418086500655, "success_rate.epoch.env.math": 0.9731638418079096, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8263995891114535, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8675469863537949, "success_rate.epoch.global": 0.8988652199880549, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974324324324324, "tokens_p.mean_in_band": 0.5129310344827587, "tokens_rate.above_band": 0.9696016771488469, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03039832285115304 }, { "epoch": 1.5370685982104815, "grad_norm": 116.67579592542391, "learning_rate": 3.756976725399448e-07, "loss": 0.1264, "step": 7215, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.918954248366013, "success_rate.epoch.env.math": 0.9731827805222301, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.826844262295082, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677379457209221, "success_rate.epoch.global": 0.8990661633220743, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972578642384106, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.9983471074380166, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001652892561983471 }, { "epoch": 1.538133787814231, "grad_norm": 33.37005056284808, "learning_rate": 3.756662585176893e-07, "loss": 0.1664, "step": 7220, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9178617992177314, "success_rate.epoch.env.math": 0.9732205778717407, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8271983640081799, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676914055378995, "success_rate.epoch.global": 0.8990680150703946, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0005782169890665, "tokens_p.mean_in_band": 0.6734375, "tokens_rate.above_band": 0.9958123953098827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0041876046901172526 }, { "epoch": 1.5391989774179804, "grad_norm": 72.70054637160406, "learning_rate": 3.756348330779772e-07, "loss": 0.2936, "step": 7225, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9167750325097529, "success_rate.epoch.env.math": 0.9732958538299368, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8275510204081633, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676315115060997, "success_rate.epoch.global": 0.8990698594894122, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998828125, "tokens_p.mean_in_band": 0.49333639705882354, "tokens_rate.above_band": 0.974124809741248, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0258751902587519 }, { "epoch": 1.5402641670217299, "grad_norm": 90.24994920992617, "learning_rate": 3.756033962437112e-07, "loss": 0.2605, "step": 7230, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8620689655172413, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9547325102880658, "success_rate.epoch.env.logic": 0.9167750325097529, "success_rate.epoch.env.math": 0.973314606741573, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8276563294356889, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867746872605, "success_rate.epoch.global": 0.8990716966225558, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985984219269103, "tokens_p.mean_in_band": 0.35409007352941174, "tokens_rate.above_band": 0.9860769860769861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013923013923013924 }, { "epoch": 1.5413293566254793, "grad_norm": 226.23333593108475, "learning_rate": 3.7557194803780207e-07, "loss": 0.7041, "step": 7235, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.863013698630137, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9549180327868853, "success_rate.epoch.env.logic": 0.9168831168831169, "success_rate.epoch.env.math": 0.9726507713884993, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.827079107505071, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677466255778534, "success_rate.epoch.global": 0.8986792824758526, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.82, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981871546961326, "tokens_p.mean_in_band": 0.48829868861607145, "tokens_rate.above_band": 0.981029810298103, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018970189701897018 }, { "epoch": 1.5423945462292288, "grad_norm": 59.49632208363608, "learning_rate": 3.7554048848316915e-07, "loss": 0.2515, "step": 7240, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.863013698630137, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9549180327868853, "success_rate.epoch.env.logic": 0.917098445595855, "success_rate.epoch.env.math": 0.972027972027972, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8268354430379747, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677068731127082, "success_rate.epoch.global": 0.898485146566988, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955439814814815, "tokens_p.mean_in_band": 0.6612723214285714, "tokens_rate.above_band": 0.9747292418772563, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02527075812274368 }, { "epoch": 1.5434597358329782, "grad_norm": 211.25196886932898, "learning_rate": 3.755090176027399e-07, "loss": 0.2325, "step": 7245, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.863013698630137, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.917312661498708, "success_rate.epoch.env.math": 0.9720865317515701, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8271854471955533, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677802174593875, "success_rate.epoch.global": 0.8986844688788533, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990053050397878, "tokens_p.mean_in_band": 0.396484375, "tokens_rate.above_band": 0.9973544973544973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0026455026455026454 }, { "epoch": 1.5445249254367277, "grad_norm": 88.17144734564283, "learning_rate": 3.7547753541945e-07, "loss": 0.3595, "step": 7250, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8639455782312925, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.9175257731958762, "success_rate.epoch.env.math": 0.9721642310368824, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8269424823410696, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8678692834347648, "success_rate.epoch.global": 0.898687046835195, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975892857142857, "tokens_p.mean_in_band": 0.5809151785714286, "tokens_rate.above_band": 0.9803921568627451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0196078431372549 }, { "epoch": 1.5455901150404772, "grad_norm": 201.8897655836413, "learning_rate": 3.7544604195624363e-07, "loss": 0.3318, "step": 7255, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8639455782312925, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.9163449163449163, "success_rate.epoch.env.math": 0.9722607489597781, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8267875125881168, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.867756619009218, "success_rate.epoch.global": 0.8984940348132212, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9935515873015873, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.945, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055 }, { "epoch": 1.5466553046442266, "grad_norm": 120.5917772841325, "learning_rate": 3.7541453723607284e-07, "loss": 0.4153, "step": 7260, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.8639455782312925, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.951417004048583, "success_rate.epoch.env.logic": 0.916452442159383, "success_rate.epoch.env.math": 0.9722991689750693, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8270487682252388, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8674726245696988, "success_rate.epoch.global": 0.8984969744290455, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9866183315431436, "tokens_p.mean_in_band": 0.4970485336752899, "tokens_rate.above_band": 0.7135922330097088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.28640776699029125 }, { "epoch": 1.547720494247976, "grad_norm": 270.2429326059362, "learning_rate": 3.75383021281898e-07, "loss": 0.332, "step": 7265, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.8657718120805369, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.916452442159383, "success_rate.epoch.env.math": 0.9723374827109267, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8268072289156626, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8676748573221541, "success_rate.epoch.global": 0.8984999025910774, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936380293159609, "tokens_p.mean_in_band": 0.7663810483870968, "tokens_rate.above_band": 0.9753772835583797, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024622716441620333 }, { "epoch": 1.5487856838517255, "grad_norm": 133.22513118022485, "learning_rate": 3.7535149411668784e-07, "loss": 0.3863, "step": 7270, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.8675496688741722, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9165596919127086, "success_rate.epoch.env.math": 0.9716850828729282, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8272408612919379, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8678263426935394, "success_rate.epoch.global": 0.8985028193661287, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998202264381885, "tokens_p.mean_in_band": 0.0018157958984375, "tokens_rate.above_band": 0.9987775061124694, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012224938875305623 }, { "epoch": 1.5498508734554752, "grad_norm": 83.8355173301768, "learning_rate": 3.7531995576341915e-07, "loss": 0.3274, "step": 7275, "success_rate.epoch.env.abd": 0.9826086956521739, "success_rate.epoch.env.agentgym:alfworld": 0.8675496688741722, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9156010230179028, "success_rate.epoch.env.math": 0.9717241379310345, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8270864567716142, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8677356087120347, "success_rate.epoch.global": 0.8983116631088687, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977409638554217, "tokens_p.mean_in_band": 0.4609375, "tokens_rate.above_band": 0.9707602339181286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029239766081871343 }, { "epoch": 1.5509160630592245, "grad_norm": 179.02460433090934, "learning_rate": 3.7528840624507676e-07, "loss": 0.2639, "step": 7280, "success_rate.epoch.env.abd": 0.9826086956521739, "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9157088122605364, "success_rate.epoch.env.math": 0.9717436250861475, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.827517447657029, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8679627437205297, "success_rate.epoch.global": 0.8985086190199496, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997549926035503, "tokens_p.mean_in_band": 0.7565104166666666, "tokens_rate.above_band": 0.9955817378497791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004418262150220913 }, { "epoch": 1.5519812526629742, "grad_norm": 87.68828579706313, "learning_rate": 3.7525684558465367e-07, "loss": 0.3525, "step": 7285, "success_rate.epoch.env.abd": 0.9826839826839827, "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9158163265306123, "success_rate.epoch.env.math": 0.9718406593406593, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.827775012444002, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8680115982971995, "success_rate.epoch.global": 0.8987048134544752, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943985849056604, "tokens_p.mean_in_band": 0.8541666666666666, "tokens_rate.above_band": 0.9724770642201835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027522935779816515 }, { "epoch": 1.5530464422667234, "grad_norm": 148.35392171602203, "learning_rate": 3.7522527380515126e-07, "loss": 0.4016, "step": 7290, "success_rate.epoch.env.abd": 0.9826839826839827, "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9159235668789809, "success_rate.epoch.env.math": 0.9719178082191781, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8276204669647292, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8680330748514204, "success_rate.epoch.global": 0.8987073123673548, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973958333333334, "tokens_p.mean_in_band": 0.5725446428571429, "tokens_rate.above_band": 0.9882352941176471, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011764705882352941 }, { "epoch": 1.554111631870473, "grad_norm": 237.85102098860557, "learning_rate": 3.751936909295787e-07, "loss": 0.3177, "step": 7295, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.869281045751634, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.916243654822335, "success_rate.epoch.env.math": 0.9719753930280246, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8273809523809523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8680524199581563, "success_rate.epoch.global": 0.8987098016560755, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933823529411765, "tokens_p.mean_in_band": 0.503125, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 1.5551768214742223, "grad_norm": 78.36883597045158, "learning_rate": 3.7516209698095337e-07, "loss": 0.3323, "step": 7300, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9164556962025316, "success_rate.epoch.env.math": 0.9720708446866485, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8275520317145689, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8681730923900357, "success_rate.epoch.global": 0.8989044781856621, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999047256097561, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.9791044776119403, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020895522388059702 }, { "epoch": 1.556242011077972, "grad_norm": 231.83040459248767, "learning_rate": 3.751304919823007e-07, "loss": 0.2871, "step": 7305, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.952, "success_rate.epoch.env.logic": 0.9167717528373266, "success_rate.epoch.env.math": 0.9720898570456092, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8264953040039545, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8681250116961702, "success_rate.epoch.global": 0.8985229234605793, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9972875916870416, "tokens_p.mean_below_band": 6.693881005048752e-10, "tokens_p.mean_in_band": 0.4787326388888889, "tokens_rate.above_band": 0.977299880525687, "tokens_rate.below_band": 0.0011947431302270011, "tokens_rate.in_band": 0.021505376344086023 }, { "epoch": 1.5573072006817212, "grad_norm": 78.69483288243262, "learning_rate": 3.750988759566542e-07, "loss": 0.1893, "step": 7310, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.952191235059761, "success_rate.epoch.env.logic": 0.9168765743073047, "success_rate.epoch.env.math": 0.9721467391304348, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8267522211253702, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8682554302255181, "success_rate.epoch.global": 0.8987172123300785, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982412316476346, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9991850040749797, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0008149959250203749 }, { "epoch": 1.558372390285471, "grad_norm": 122.75864728381704, "learning_rate": 3.7506724892705544e-07, "loss": 0.3226, "step": 7315, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8701298701298701, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9642857142857143, "success_rate.epoch.env.ded": 0.952191235059761, "success_rate.epoch.env.logic": 0.917189460476788, "success_rate.epoch.env.math": 0.9721845318860244, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8270935960591133, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684385943329331, "success_rate.epoch.global": 0.8989107586470476, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998567335243553, "tokens_p.mean_in_band": 0.7763671875, "tokens_rate.above_band": 0.9886685552407932, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0113314447592068 }, { "epoch": 1.5594375798892202, "grad_norm": 56.34095357579713, "learning_rate": 3.7503561091655393e-07, "loss": 0.238, "step": 7320, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9525691699604744, "success_rate.epoch.env.logic": 0.917189460476788, "success_rate.epoch.env.math": 0.972241029113067, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8273487456960157, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8686894107569212, "success_rate.epoch.global": 0.899103566660309, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998546974522293, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9993634627625716, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0006365372374283895 }, { "epoch": 1.5605027694929698, "grad_norm": 634.8912894277909, "learning_rate": 3.750039619482072e-07, "loss": 0.1986, "step": 7325, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.9173967459324155, "success_rate.epoch.env.math": 0.9723160027008778, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8276031434184676, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8687551736948508, "success_rate.epoch.global": 0.8992956405863316, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971968438538206, "tokens_p.mean_in_band": 0.7604166666666666, "tokens_rate.above_band": 0.9966887417218543, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0033112582781456954 }, { "epoch": 1.561567959096719, "grad_norm": 60.98056153603716, "learning_rate": 3.7497230204508085e-07, "loss": 0.2347, "step": 7330, "success_rate.epoch.env.abd": 0.9829787234042553, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.9175, "success_rate.epoch.env.math": 0.9723905723905724, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8278567925453654, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688076808096401, "success_rate.epoch.global": 0.8994869846095382, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997874149659864, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5626331487004688, "grad_norm": 184.51817282588448, "learning_rate": 3.749406312302484e-07, "loss": 0.2173, "step": 7335, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9175, "success_rate.epoch.env.math": 0.9724091520861373, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8284457478005866, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688863108062705, "success_rate.epoch.global": 0.8996776028826096, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970067049808429, "tokens_p.mean_in_band": 0.6940104166666666, "tokens_rate.above_band": 0.9886363636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011363636363636364 }, { "epoch": 1.563698338304218, "grad_norm": 70.40206931706632, "learning_rate": 3.749089495267912e-07, "loss": 0.2263, "step": 7340, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9165628891656289, "success_rate.epoch.env.math": 0.9724462365591398, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8282088823816496, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688748618615882, "success_rate.epoch.global": 0.899488926746167, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981335324232082, "tokens_p.mean_in_band": 0.6076388888888888, "tokens_rate.above_band": 0.984873949579832, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015126050420168067 }, { "epoch": 1.5647635279079677, "grad_norm": 92.9873030626634, "learning_rate": 3.748772569577988e-07, "loss": 0.3012, "step": 7345, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.953307392996109, "success_rate.epoch.env.logic": 0.9168734491315137, "success_rate.epoch.env.math": 0.9725016767270288, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8278888347147733, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689141658462263, "success_rate.epoch.global": 0.899489892310599, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977964743589743, "tokens_p.mean_in_band": 0.513671875, "tokens_rate.above_band": 0.9915254237288136, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00847457627118644 }, { "epoch": 1.565828717511717, "grad_norm": 143.13321887750806, "learning_rate": 3.748455535463684e-07, "loss": 0.2566, "step": 7350, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9169764560099133, "success_rate.epoch.env.math": 0.9725935828877005, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8271665043816943, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688826714646724, "success_rate.epoch.global": 0.8993022817273242, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978966346153846, "tokens_p.mean_in_band": 0.501953125, "tokens_rate.above_band": 0.975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025 }, { "epoch": 1.5668939071154666, "grad_norm": 218.88817745450382, "learning_rate": 3.748138393156052e-07, "loss": 0.3032, "step": 7355, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8717948717948718, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9169764560099133, "success_rate.epoch.env.math": 0.9726666666666667, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8276699029126213, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.86893507894739, "success_rate.epoch.global": 0.8994918125352908, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948369565217391, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.9829059829059829, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017094017094017096 }, { "epoch": 1.567959096719216, "grad_norm": 92.73721293149022, "learning_rate": 3.7478211428862247e-07, "loss": 0.3906, "step": 7360, "success_rate.epoch.env.abd": 0.9831932773109243, "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9159456118665018, "success_rate.epoch.env.math": 0.9727030625832224, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8279205041202132, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689546404026003, "success_rate.epoch.global": 0.8994927672365207, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978649068322981, "tokens_p.mean_in_band": 0.6637073863636364, "tokens_rate.above_band": 0.9669669669669669, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03303303303303303 }, { "epoch": 1.5690242863229655, "grad_norm": 80.54347956379502, "learning_rate": 3.74750378488541e-07, "loss": 0.2968, "step": 7365, "success_rate.epoch.env.abd": 0.9832635983263598, "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9161528976572133, "success_rate.epoch.env.math": 0.9727393617021277, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8282535074987906, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690297759370149, "success_rate.epoch.global": 0.8996812300768798, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99925, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9973404255319149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0026595744680851063 }, { "epoch": 1.570089475926715, "grad_norm": 110.22776202569082, "learning_rate": 3.747186319384897e-07, "loss": 0.3747, "step": 7370, "success_rate.epoch.env.abd": 0.9832635983263598, "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9163591635916359, "success_rate.epoch.env.math": 0.9727755644090306, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8281853281853282, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690639513309457, "success_rate.epoch.global": 0.899681826689126, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972181008902077, "tokens_p.mean_in_band": 0.4301215277777778, "tokens_rate.above_band": 0.9739884393063584, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02601156069364162 }, { "epoch": 1.5711546655304645, "grad_norm": 717.7661144961071, "learning_rate": 3.746868746616052e-07, "loss": 0.2865, "step": 7375, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8726114649681529, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9164619164619164, "success_rate.epoch.env.math": 0.9728296885354539, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8281174771304767, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690783841444755, "success_rate.epoch.global": 0.8996824210722959, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9927262931034483, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.9747899159663865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025210084033613446 }, { "epoch": 1.572219855134214, "grad_norm": 149.78207009058485, "learning_rate": 3.7465510668103204e-07, "loss": 0.1977, "step": 7380, "success_rate.epoch.env.abd": 0.983402489626556, "success_rate.epoch.env.agentgym:alfworld": 0.8734177215189873, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9165644171779141, "success_rate.epoch.env.math": 0.9728476821192052, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8285302593659942, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688398779003671, "success_rate.epoch.global": 0.8996830132388588, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997145061728395, "tokens_p.mean_in_band": 0.690185546875, "tokens_rate.above_band": 0.9619952494061758, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03800475059382423 }, { "epoch": 1.5732850447379634, "grad_norm": 76.44000974123742, "learning_rate": 3.7462332801992243e-07, "loss": 0.2206, "step": 7385, "success_rate.epoch.env.abd": 0.9834710743801653, "success_rate.epoch.env.agentgym:alfworld": 0.8734177215189873, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9167686658506732, "success_rate.epoch.env.math": 0.9728656518861681, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8284619070436032, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868878220916286, "success_rate.epoch.global": 0.8996836032011911, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995, "tokens_p.mean_in_band": 0.345703125, "tokens_rate.above_band": 0.984251968503937, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015748031496062992 }, { "epoch": 1.5743502343417128, "grad_norm": 335.0100723795315, "learning_rate": 3.7459153870143644e-07, "loss": 0.2626, "step": 7390, "success_rate.epoch.env.abd": 0.9835390946502057, "success_rate.epoch.env.agentgym:alfworld": 0.8742138364779874, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9167686658506732, "success_rate.epoch.env.math": 0.9729015201586253, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8284758719541329, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689613089537428, "success_rate.epoch.global": 0.8996841909715771, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972967128027682, "tokens_p.mean_in_band": 0.5703125, "tokens_rate.above_band": 0.9796610169491525, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020338983050847456 }, { "epoch": 1.5754154239454623, "grad_norm": 285.8289423925264, "learning_rate": 3.745597387487419e-07, "loss": 0.2655, "step": 7395, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8742138364779874, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9156479217603912, "success_rate.epoch.env.math": 0.972937293729373, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8288030519790176, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689308277782871, "success_rate.epoch.global": 0.8996847765622102, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0004002988898377, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9890202702702703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01097972972972973 }, { "epoch": 1.5764806135492118, "grad_norm": 93.15656509004323, "learning_rate": 3.7452792818501434e-07, "loss": 0.437, "step": 7400, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.86875, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9158536585365854, "success_rate.epoch.env.math": 0.972972972972973, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8285714285714286, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684471219742456, "success_rate.epoch.global": 0.8995002776235425, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981343283582089, "tokens_p.mean_in_band": 0.4365234375, "tokens_rate.above_band": 0.9654178674351584, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0345821325648415 }, { "epoch": 1.5775458031529612, "grad_norm": 129.66763779012854, "learning_rate": 3.744961070334372e-07, "loss": 0.2338, "step": 7405, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.86875, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9159561510353228, "success_rate.epoch.env.math": 0.9730440499671269, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8288159771754636, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684970536506534, "success_rate.epoch.global": 0.8996859412525402, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9939793577981652, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5786109927567107, "grad_norm": 54.17844040797932, "learning_rate": 3.744642753172014e-07, "loss": 0.3054, "step": 7410, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9159561510353228, "success_rate.epoch.env.math": 0.9730617608409987, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.828909952606635, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8686858108309903, "success_rate.epoch.global": 0.8996865203761756, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982506361323156, "tokens_p.mean_in_band": 0.427734375, "tokens_rate.above_band": 0.9899244332493703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010075566750629723 }, { "epoch": 1.5796761823604601, "grad_norm": 110.91168274500707, "learning_rate": 3.744324330595057e-07, "loss": 0.1285, "step": 7415, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9655172413793104, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.916058394160584, "success_rate.epoch.env.math": 0.9731675392670157, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8291528632276385, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8687268046648341, "success_rate.epoch.global": 0.8998711577397386, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.990234375, "tokens_p.mean_in_band": 0.8502604166666666, "tokens_rate.above_band": 0.9411764705882353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058823529411764705 }, { "epoch": 1.5807413719642096, "grad_norm": 133.0249004341405, "learning_rate": 3.7440058028355646e-07, "loss": 0.3102, "step": 7420, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9543726235741445, "success_rate.epoch.env.logic": 0.9162621359223301, "success_rate.epoch.env.math": 0.973185088293002, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8293144208037825, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688465262073993, "success_rate.epoch.global": 0.9000551166636046, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994996360989811, "tokens_p.mean_in_band": 0.7265625, "tokens_rate.above_band": 0.9970972423802612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002902757619738752 }, { "epoch": 1.581806561567959, "grad_norm": 95.87916724368114, "learning_rate": 3.7436871701256784e-07, "loss": 0.3336, "step": 7425, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9543726235741445, "success_rate.epoch.env.logic": 0.9162621359223301, "success_rate.epoch.env.math": 0.9732375979112271, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8292452830188679, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689269852349321, "success_rate.epoch.global": 0.9000550155877498, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986929657794676, "tokens_p.mean_in_band": 0.390625, "tokens_rate.above_band": 0.9704797047970479, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02952029520295203 }, { "epoch": 1.5828717511717085, "grad_norm": 281.93536178156836, "learning_rate": 3.7433684326976145e-07, "loss": 0.2062, "step": 7430, "success_rate.epoch.env.abd": 0.9839357429718876, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9162621359223301, "success_rate.epoch.env.math": 0.9732550554468362, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8288669487541138, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689157788233064, "success_rate.epoch.global": 0.8998718652754897, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9960469374167776, "tokens_p.mean_in_band": 0.5223721590909091, "tokens_rate.above_band": 0.9855643044619422, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014435695538057743 }, { "epoch": 1.583936940775458, "grad_norm": 63.85553003750588, "learning_rate": 3.7430495907836675e-07, "loss": 0.193, "step": 7435, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9164648910411622, "success_rate.epoch.env.math": 0.9732899022801303, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8292682926829268, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689797063604928, "success_rate.epoch.global": 0.9000548145441257, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967105263157895, "tokens_p.mean_in_band": 0.7587890625, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 1.5850021303792075, "grad_norm": 102.56513791047378, "learning_rate": 3.742730644616207e-07, "loss": 0.3006, "step": 7440, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9664804469273743, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9733420026007802, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8285714285714286, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689721473166011, "success_rate.epoch.global": 0.8998723326645997, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666668, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999721975088968, "tokens_p.mean_in_band": 0.5302734375, "tokens_rate.above_band": 0.9859649122807017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014035087719298246 }, { "epoch": 1.586067319982957, "grad_norm": 114.09171612837113, "learning_rate": 3.742411594427678e-07, "loss": 0.2446, "step": 7445, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.916767189384801, "success_rate.epoch.env.math": 0.9733593242365172, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8286647992530346, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869008277750671, "success_rate.epoch.global": 0.8998725650828326, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968354430379747, "tokens_p.mean_in_band": 0.625, "tokens_rate.above_band": 0.9693251533742331, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03067484662576687 }, { "epoch": 1.5871325095867066, "grad_norm": 403.0613650343333, "learning_rate": 3.7420924404506027e-07, "loss": 0.2355, "step": 7450, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9157641395908543, "success_rate.epoch.env.math": 0.9734283862605314, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.828904428904429, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689451542853495, "success_rate.epoch.global": 0.8998727966563692, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994310793237972, "tokens_p.mean_in_band": 0.5661764705882353, "tokens_rate.above_band": 0.9576587795765878, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04234122042341221 }, { "epoch": 1.5881976991904558, "grad_norm": 75.75547823520138, "learning_rate": 3.7417731829175774e-07, "loss": 0.3218, "step": 7455, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9158653846153846, "success_rate.epoch.env.math": 0.9734799482535575, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8293023255813954, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690119602632712, "success_rate.epoch.global": 0.9000544168329403, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982850609756098, "tokens_p.mean_in_band": 0.69921875, "tokens_rate.above_band": 0.9704142011834319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029585798816568046 }, { "epoch": 1.5892628887942055, "grad_norm": 420.98657005573835, "learning_rate": 3.7414538220612756e-07, "loss": 0.2801, "step": 7460, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9159663865546218, "success_rate.epoch.env.math": 0.9735824742268041, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8294609665427509, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690603607893508, "success_rate.epoch.global": 0.9002353793228318, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962469362745098, "tokens_p.mean_in_band": 0.7981770833333334, "tokens_rate.above_band": 0.9855072463768116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014492753623188406 }, { "epoch": 1.5903280783979548, "grad_norm": 120.57693716068823, "learning_rate": 3.741134358114445e-07, "loss": 0.3244, "step": 7465, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9161676646706587, "success_rate.epoch.env.math": 0.9736673089274245, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8296983758700696, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691079537115306, "success_rate.epoch.global": 0.9004156876920296, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9908854166666666, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9795918367346939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02040816326530612 }, { "epoch": 1.5913932680017044, "grad_norm": 146.17220641527368, "learning_rate": 3.7408147913099083e-07, "loss": 0.2916, "step": 7470, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.916267942583732, "success_rate.epoch.env.math": 0.9737010904425915, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8293246993524515, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690861703397688, "success_rate.epoch.global": 0.9002345300378857, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955778301886793, "tokens_p.mean_in_band": 0.563232421875, "tokens_rate.above_band": 0.9298245614035088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07017543859649122 }, { "epoch": 1.5924584576054537, "grad_norm": 81.85401089471598, "learning_rate": 3.740495121880563e-07, "loss": 0.2498, "step": 7475, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9163679808841099, "success_rate.epoch.env.math": 0.9737683941138836, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8294824399260629, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869211272053219, "success_rate.epoch.global": 0.9004141905276427, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974385245901639, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5935236472092034, "grad_norm": 202.3792501137584, "learning_rate": 3.7401753500593835e-07, "loss": 0.2716, "step": 7480, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9165673420738975, "success_rate.epoch.env.math": 0.9738353541799617, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8292570373788648, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693127436464801, "success_rate.epoch.global": 0.9004134459823836, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991185897435897, "tokens_p.mean_in_band": 0.6848958333333334, "tokens_rate.above_band": 0.9774436090225563, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022556390977443608 }, { "epoch": 1.5945888368129526, "grad_norm": 89.87928124264751, "learning_rate": 3.7398554760794156e-07, "loss": 0.2647, "step": 7485, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9167657550535078, "success_rate.epoch.env.math": 0.9738687061822817, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8296500920810314, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869385922501501, "success_rate.epoch.global": 0.9005921406782702, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972074468085106, "tokens_p.mean_in_band": 0.8815104166666666, "tokens_rate.above_band": 0.9873949579831933, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012605042016806723 }, { "epoch": 1.5956540264167023, "grad_norm": 41.06241910836217, "learning_rate": 3.739535500173782e-07, "loss": 0.1696, "step": 7490, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9167657550535078, "success_rate.epoch.env.math": 0.9732824427480916, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8285845588235294, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692730673868887, "success_rate.epoch.global": 0.9000537345513165, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.7833333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9973006644518272, "tokens_p.mean_in_band": 0.6354166666666666, "tokens_rate.above_band": 0.9435736677115988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05642633228840126 }, { "epoch": 1.5967192160204515, "grad_norm": 105.36218028997493, "learning_rate": 3.7392154225756783e-07, "loss": 0.2389, "step": 7495, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9156769596199525, "success_rate.epoch.env.math": 0.9733333333333334, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8285976168652612, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691798994953814, "success_rate.epoch.global": 0.899874843554443, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954844006568144, "tokens_p.mean_in_band": 0.5321875, "tokens_rate.above_band": 0.9241274658573596, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07587253414264036 }, { "epoch": 1.5977844056242012, "grad_norm": 85.06000770032496, "learning_rate": 3.738895243518375e-07, "loss": 0.2327, "step": 7500, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9158767772511849, "success_rate.epoch.env.math": 0.973384030418251, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8289112534309241, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692925281421515, "success_rate.epoch.global": 0.9000535427449581, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976173020527859, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5988495952279504, "grad_norm": 158.61518151707392, "learning_rate": 3.7385749632352165e-07, "loss": 0.3134, "step": 7505, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9159763313609467, "success_rate.epoch.env.math": 0.9734513274336283, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8291457286432161, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693507398467285, "success_rate.epoch.global": 0.9002316052022091, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953785211267606, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.9726027397260274, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0273972602739726 }, { "epoch": 1.5999147848317001, "grad_norm": 47.171231342409065, "learning_rate": 3.738254581959621e-07, "loss": 0.124, "step": 7510, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9161747343565525, "success_rate.epoch.env.math": 0.9734848484848485, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8295350957155879, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8694230724863197, "success_rate.epoch.global": 0.900409034323315, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957298136645962, "tokens_p.mean_in_band": 0.71875, "tokens_rate.above_band": 0.9877300613496932, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012269938650306749 }, { "epoch": 1.6009799744354494, "grad_norm": 449.16224571290246, "learning_rate": 3.7379340999250794e-07, "loss": 0.3919, "step": 7515, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9516728624535316, "success_rate.epoch.env.logic": 0.9161747343565525, "success_rate.epoch.env.math": 0.9735516372795969, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8293903548680619, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691084116340897, "success_rate.epoch.global": 0.9002307828865613, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9913002329644729, "tokens_p.mean_below_band": 2.60770320892334e-07, "tokens_p.mean_in_band": 0.47299038951120165, "tokens_rate.above_band": 0.7769230769230769, "tokens_rate.below_band": 0.0009049773755656109, "tokens_rate.in_band": 0.22217194570135745 }, { "epoch": 1.602045164039199, "grad_norm": 112.49181236949589, "learning_rate": 3.737613517365157e-07, "loss": 0.2804, "step": 7520, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9516728624535316, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9736015084852294, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8290131878126421, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691233785850287, "success_rate.epoch.global": 0.9002303739145845, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978298611111112, "tokens_p.mean_in_band": 0.5982142857142857, "tokens_rate.above_band": 0.976271186440678, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023728813559322035 }, { "epoch": 1.6031103536429483, "grad_norm": 64.43560988540021, "learning_rate": 3.737292834513492e-07, "loss": 0.3589, "step": 7525, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9167643610785463, "success_rate.epoch.env.math": 0.9736346516007532, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8294784580498866, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691938419634802, "success_rate.epoch.global": 0.9004068636122413, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985865290068829, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9980372914622179, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001962708537782139 }, { "epoch": 1.604175543246698, "grad_norm": 308.0946203409165, "learning_rate": 3.736972051603796e-07, "loss": 0.2861, "step": 7530, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9169590643274854, "success_rate.epoch.env.math": 0.9736677115987461, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8293345405160706, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688571116839564, "success_rate.epoch.global": 0.9002295603037259, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950336185819071, "tokens_p.mean_in_band": 0.6694078947368421, "tokens_rate.above_band": 0.955607476635514, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04439252336448598 }, { "epoch": 1.6052407328504472, "grad_norm": 43.38084235055817, "learning_rate": 3.7366511688698527e-07, "loss": 0.1036, "step": 7535, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9170560747663551, "success_rate.epoch.env.math": 0.9737171464330413, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8297968397291197, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868912452091794, "success_rate.epoch.global": 0.900405429226159, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961993243243243, "tokens_p.mean_in_band": 0.865234375, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02631578947368421 }, { "epoch": 1.606305922454197, "grad_norm": 142.03121451815522, "learning_rate": 3.73633018654552e-07, "loss": 0.3655, "step": 7540, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9171528588098017, "success_rate.epoch.env.math": 0.97375, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8293561458802341, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868942449310207, "success_rate.epoch.global": 0.9002287524194967, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957236842105263, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9313725490196079, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06862745098039216 }, { "epoch": 1.6073711120579464, "grad_norm": 281.4151639779487, "learning_rate": 3.7360091048647265e-07, "loss": 0.5624, "step": 7545, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9152148664343787, "success_rate.epoch.env.math": 0.9737827715355806, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.828982898289829, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868812088934743, "success_rate.epoch.global": 0.8998770419813806, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9997176204819277, "tokens_p.mean_in_band": 0.4446428571428571, "tokens_rate.above_band": 0.9743213499633162, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025678650036683785 }, { "epoch": 1.6084363016616958, "grad_norm": 66.56732194583225, "learning_rate": 3.735687924061476e-07, "loss": 0.2741, "step": 7550, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9520295202952029, "success_rate.epoch.env.logic": 0.9153132250580046, "success_rate.epoch.env.math": 0.9738805970149254, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8290598290598291, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688530692362271, "success_rate.epoch.global": 0.9000350754121361, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998926116838488, "tokens_p.mean_in_band": 0.7044270833333334, "tokens_rate.above_band": 0.9948717948717949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005128205128205128 }, { "epoch": 1.6095014912654453, "grad_norm": 319.7704207125596, "learning_rate": 3.73536664436984e-07, "loss": 0.342, "step": 7555, "success_rate.epoch.env.abd": 0.984313725490196, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9520295202952029, "success_rate.epoch.env.logic": 0.9154113557358053, "success_rate.epoch.env.math": 0.9739454094292804, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8293668612483162, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868901408533218, "success_rate.epoch.global": 0.9002100840336135, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.7022569444444444, "tokens_rate.above_band": 0.9166666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08333333333333333 }, { "epoch": 1.6105666808691947, "grad_norm": 0.0, "learning_rate": 3.7350452660239666e-07, "loss": 0.1687, "step": 7560, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8711656441717791, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9522058823529411, "success_rate.epoch.env.logic": 0.9155092592592593, "success_rate.epoch.env.math": 0.9739615623062616, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8297491039426523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690404275753746, "success_rate.epoch.global": 0.9003844809507165, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992517605633803, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9985935302390999, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014064697609001407 }, { "epoch": 1.6116318704729442, "grad_norm": 29.96036182473792, "learning_rate": 3.7347237892580745e-07, "loss": 0.3907, "step": 7565, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8711656441717791, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9157043879907621, "success_rate.epoch.env.math": 0.9740420271940667, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8299015219337511, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690952531788674, "success_rate.epoch.global": 0.9005582693649686, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970703125, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9696969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030303030303030304 }, { "epoch": 1.6126970600766937, "grad_norm": 114.037020582343, "learning_rate": 3.7344022143064526e-07, "loss": 0.2688, "step": 7570, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9158016147635525, "success_rate.epoch.env.math": 0.974090067859346, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8289414917373827, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690925997772415, "success_rate.epoch.global": 0.900208986415883, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9957078313253012, "tokens_p.mean_in_band": 0.6019736842105263, "tokens_rate.above_band": 0.956221198156682, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04377880184331797 }, { "epoch": 1.6137622496804431, "grad_norm": 225.17243415463471, "learning_rate": 3.734080541403463e-07, "loss": 0.2769, "step": 7575, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9158986175115207, "success_rate.epoch.env.math": 0.974169741697417, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8291703835860839, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691822623706645, "success_rate.epoch.global": 0.9003824756606398, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978658536585366, "tokens_p.mean_in_band": 0.771875, "tokens_rate.above_band": 0.9761904761904762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023809523809523808 }, { "epoch": 1.6148274392841926, "grad_norm": 67.9033884536374, "learning_rate": 3.7337587707835383e-07, "loss": 0.1484, "step": 7580, "success_rate.epoch.env.abd": 0.9844357976653697, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9158986175115207, "success_rate.epoch.env.math": 0.9742173112338858, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8295505117935025, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692421863060793, "success_rate.epoch.global": 0.9005553627212773, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972615979381443, "tokens_p.mean_in_band": 0.709375, "tokens_rate.above_band": 0.9748743718592965, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02512562814070352 }, { "epoch": 1.615892628887942, "grad_norm": 77.25770649901607, "learning_rate": 3.7334369026811825e-07, "loss": 0.3723, "step": 7585, "success_rate.epoch.env.abd": 0.9844961240310077, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9160919540229885, "success_rate.epoch.env.math": 0.9742804654011022, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8297777777777777, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692916483995884, "success_rate.epoch.global": 0.9007276507276507, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9899553571428571, "tokens_p.mean_in_band": 0.7907366071428571, "tokens_rate.above_band": 0.9411764705882353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058823529411764705 }, { "epoch": 1.6169578184916915, "grad_norm": 321.43578582718186, "learning_rate": 3.73311493733097e-07, "loss": 0.2626, "step": 7590, "success_rate.epoch.env.abd": 0.9845559845559846, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9152348224513173, "success_rate.epoch.env.math": 0.9737324373854612, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8299289520426287, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869183091599817, "success_rate.epoch.global": 0.9005534417156693, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0009995791245792, "tokens_p.mean_in_band": 0.45590277777777777, "tokens_rate.above_band": 0.9295774647887324, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07042253521126761 }, { "epoch": 1.618023008095441, "grad_norm": 80.81922636871866, "learning_rate": 3.7327928749675494e-07, "loss": 0.2884, "step": 7595, "success_rate.epoch.env.abd": 0.9845559845559846, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9152348224513173, "success_rate.epoch.env.math": 0.973780487804878, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8300132802124834, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691951260170234, "success_rate.epoch.global": 0.9005524861878453, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895833333333334, "tokens_p.mean_in_band": 0.6744791666666666, "tokens_rate.above_band": 0.9166666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08333333333333333 }, { "epoch": 1.6190881976991904, "grad_norm": 102.82476991420386, "learning_rate": 3.732470715825635e-07, "loss": 0.2456, "step": 7600, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.9155251141552512, "success_rate.epoch.env.math": 0.973780487804878, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8303886925795053, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869276843860201, "success_rate.epoch.global": 0.9007238883143743, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959914921465969, "tokens_p.mean_in_band": 0.6702008928571429, "tokens_rate.above_band": 0.9646464646464646, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03535353535353535 }, { "epoch": 1.62015338730294, "grad_norm": 180.67783720273331, "learning_rate": 3.732148460140015e-07, "loss": 0.2127, "step": 7605, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.9157175398633257, "success_rate.epoch.env.math": 0.9732360097323601, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8301720335244817, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692404939012484, "success_rate.epoch.global": 0.9005505849965588, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9931506849315068, "tokens_p.mean_in_band": 0.7346354166666667, "tokens_rate.above_band": 0.906832298136646, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09316770186335403 }, { "epoch": 1.6212185769066894, "grad_norm": 40.51645321487268, "learning_rate": 3.7318261081455464e-07, "loss": 0.2116, "step": 7610, "success_rate.epoch.env.abd": 0.9847908745247148, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.9158134243458476, "success_rate.epoch.env.math": 0.9732522796352584, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8305457746478874, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693006198483534, "success_rate.epoch.global": 0.900721401580213, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998686974789916, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.967479674796748, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032520325203252036 }, { "epoch": 1.6222837665104388, "grad_norm": 62.28192926403548, "learning_rate": 3.731503660077158e-07, "loss": 0.2969, "step": 7615, "success_rate.epoch.env.abd": 0.9847908745247148, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9528985507246377, "success_rate.epoch.env.logic": 0.9159090909090909, "success_rate.epoch.env.math": 0.9733171619163129, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8306948109058927, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693752085464449, "success_rate.epoch.global": 0.9008916323731139, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986694868995634, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9989094874591058, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0010905125408942203 }, { "epoch": 1.6233489561141883, "grad_norm": 156.81930557595388, "learning_rate": 3.731181116169847e-07, "loss": 0.2337, "step": 7620, "success_rate.epoch.env.abd": 0.9849056603773585, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9160045402951191, "success_rate.epoch.env.math": 0.9733494851605088, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8305531167690957, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690716446715308, "success_rate.epoch.global": 0.9007189318726464, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934225512528474, "tokens_p.mean_in_band": 0.6259072580645161, "tokens_rate.above_band": 0.9340425531914893, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06595744680851064 }, { "epoch": 1.6244141457179377, "grad_norm": 690.799236158285, "learning_rate": 3.7308584766586815e-07, "loss": 0.2974, "step": 7625, "success_rate.epoch.env.abd": 0.9850187265917603, "success_rate.epoch.env.agentgym:alfworld": 0.8719512195121951, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9160997732426304, "success_rate.epoch.env.math": 0.9734138972809667, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8307759754493643, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691166964863161, "success_rate.epoch.global": 0.9008885850991114, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976380813953488, "tokens_p.mean_in_band": 0.7975260416666666, "tokens_rate.above_band": 0.9662921348314607, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033707865168539325 }, { "epoch": 1.6254793353216872, "grad_norm": 153.06846157304767, "learning_rate": 3.7305357417787985e-07, "loss": 0.2226, "step": 7630, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8674698795180723, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9160997732426304, "success_rate.epoch.env.math": 0.9734779987944545, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8308501314636284, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8687370034069576, "success_rate.epoch.global": 0.9008870692596384, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974904397705545, "tokens_p.mean_in_band": 0.654296875, "tokens_rate.above_band": 0.9942965779467681, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005703422053231939 }, { "epoch": 1.6265445249254369, "grad_norm": 45.31779472198913, "learning_rate": 3.7302129117654047e-07, "loss": 0.2428, "step": 7635, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9160997732426304, "success_rate.epoch.env.math": 0.9729241877256317, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8312937062937062, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8687991267976006, "success_rate.epoch.global": 0.9008855585831063, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997327302631579, "tokens_p.mean_in_band": 0.7664930555555556, "tokens_rate.above_band": 0.9712460063897763, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02875399361022364 }, { "epoch": 1.6276097145291861, "grad_norm": 100.9174475086996, "learning_rate": 3.729889986853777e-07, "loss": 0.2554, "step": 7640, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9161947904869762, "success_rate.epoch.env.math": 0.9729567307692307, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8312254688181422, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688360778549302, "success_rate.epoch.global": 0.9008840530431826, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984038978494624, "tokens_p.mean_in_band": 0.4518229166666667, "tokens_rate.above_band": 0.9841269841269841, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015873015873015872 }, { "epoch": 1.6286749041329358, "grad_norm": 367.9406803345278, "learning_rate": 3.72956696727926e-07, "loss": 0.2885, "step": 7645, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9161947904869762, "success_rate.epoch.env.math": 0.9730215827338129, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8315926892950392, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868890231037378, "success_rate.epoch.global": 0.9010522742701969, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9916213768115942, "tokens_p.mean_in_band": 0.7005208333333334, "tokens_rate.above_band": 0.9787234042553191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02127659574468085 }, { "epoch": 1.629740093736685, "grad_norm": 231.5486625392195, "learning_rate": 3.729243853277268e-07, "loss": 0.4174, "step": 7650, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9153498871331829, "success_rate.epoch.env.math": 0.9730215827338129, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8315972222222222, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.868905475954607, "success_rate.epoch.global": 0.9008810572687225, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9932565789473684, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.8715596330275229, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12844036697247707 }, { "epoch": 1.6308052833404347, "grad_norm": 150.55881028102985, "learning_rate": 3.728920645083285e-07, "loss": 0.1494, "step": 7655, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9153498871331829, "success_rate.epoch.env.math": 0.9730700179533214, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8316017316017316, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689266982600343, "success_rate.epoch.global": 0.9008795669824087, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941821808510638, "tokens_p.mean_in_band": 0.610546875, "tokens_rate.above_band": 0.9740932642487047, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025906735751295335 }, { "epoch": 1.631870472944184, "grad_norm": 148.96900970374492, "learning_rate": 3.728597342932862e-07, "loss": 0.3319, "step": 7660, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9153498871331829, "success_rate.epoch.env.math": 0.9731182795698925, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8320379965457686, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8689854658654027, "success_rate.epoch.global": 0.901046943600135, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994758064516129, "tokens_p.mean_in_band": 0.605078125, "tokens_rate.above_band": 0.96875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03125 }, { "epoch": 1.6329356625479337, "grad_norm": 162.08260057598574, "learning_rate": 3.72827394706162e-07, "loss": 0.4404, "step": 7665, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.915445321307779, "success_rate.epoch.env.math": 0.9731503579952268, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8319689788884101, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8686716567533407, "success_rate.epoch.global": 0.9008766014834795, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9959935897435898, "tokens_p.mean_in_band": 0.6919921875, "tokens_rate.above_band": 0.9122807017543859, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08771929824561403 }, { "epoch": 1.634000852151683, "grad_norm": 342.6591660322463, "learning_rate": 3.7279504577052467e-07, "loss": 0.2027, "step": 7670, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8690476190476191, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9146067415730337, "success_rate.epoch.env.math": 0.9732142857142857, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8321136461472234, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8686856714112366, "success_rate.epoch.global": 0.9008751262201279, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0009722222222222, "tokens_p.mean_in_band": 0.4822048611111111, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 1.6350660417554326, "grad_norm": 81.46516230391956, "learning_rate": 3.727626875099499e-07, "loss": 0.1922, "step": 7675, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8698224852071006, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9147025813692481, "success_rate.epoch.env.math": 0.9732461355529132, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8324742268041238, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8684949252215465, "success_rate.epoch.global": 0.9008736559139785, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963490099009901, "tokens_p.mean_in_band": 0.6461397058823529, "tokens_rate.above_band": 0.9674329501915708, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032567049808429116 }, { "epoch": 1.6361312313591818, "grad_norm": 92.70969757869626, "learning_rate": 3.727303199480203e-07, "loss": 0.4116, "step": 7680, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9147025813692481, "success_rate.epoch.env.math": 0.9732620320855615, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8316916488222698, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8685111325040324, "success_rate.epoch.global": 0.9005367326400536, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.8928571428571428, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9967640532544378, "tokens_p.mean_in_band": 0.412109375, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 1.6371964209629315, "grad_norm": 196.54618252657176, "learning_rate": 3.7269794310832487e-07, "loss": 0.3494, "step": 7685, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9501779359430605, "success_rate.epoch.env.logic": 0.9149888143176734, "success_rate.epoch.env.math": 0.9733096085409253, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8319076133447391, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8685772879466978, "success_rate.epoch.global": 0.9007032819825854, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997125, "tokens_p.mean_in_band": 0.55078125, "tokens_rate.above_band": 0.998003992015968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001996007984031936 }, { "epoch": 1.6382616105666807, "grad_norm": 65.03341026716015, "learning_rate": 3.726655570144599e-07, "loss": 0.2486, "step": 7690, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.950530035335689, "success_rate.epoch.env.logic": 0.9140625, "success_rate.epoch.env.math": 0.9733412322274881, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8316958564715934, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8685947989999108, "success_rate.epoch.global": 0.9005349381477766, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961412535079514, "tokens_p.mean_in_band": 0.5925071022727273, "tokens_rate.above_band": 0.9798350137488543, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02016498625114574 }, { "epoch": 1.6393268001704304, "grad_norm": 103.85645798224259, "learning_rate": 3.7263316169002793e-07, "loss": 0.2125, "step": 7695, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8713450292397661, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9507042253521126, "success_rate.epoch.env.logic": 0.9141583054626533, "success_rate.epoch.env.math": 0.9733570159857904, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8320545609548167, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8709588401153131, "success_rate.epoch.global": 0.9007009345794392, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983407079646017, "tokens_p.mean_in_band": 0.8033854166666666, "tokens_rate.above_band": 0.9947183098591549, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00528169014084507 }, { "epoch": 1.6403919897741797, "grad_norm": 62.121610755628375, "learning_rate": 3.726007571586385e-07, "loss": 0.1938, "step": 7700, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.914349276974416, "success_rate.epoch.env.math": 0.9733885274985216, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8322690506598552, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710872218993547, "success_rate.epoch.global": 0.900866377874042, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9998265815760267, "tokens_p.mean_in_band": 0.734375, "tokens_rate.above_band": 0.9988913525498891, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0011086474501108647 }, { "epoch": 1.6414571793779293, "grad_norm": 275.0158453149495, "learning_rate": 3.7256834344390776e-07, "loss": 0.2968, "step": 7705, "success_rate.epoch.env.abd": 0.9853479853479854, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.9147286821705426, "success_rate.epoch.env.math": 0.973435655253837, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8324117396852403, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.871143866415514, "success_rate.epoch.global": 0.9010312707917498, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982930672268907, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9916666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008333333333333333 }, { "epoch": 1.6425223689816786, "grad_norm": 71.57081103326922, "learning_rate": 3.725359205694587e-07, "loss": 0.3018, "step": 7710, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.9150110375275938, "success_rate.epoch.env.math": 0.972877358490566, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.832625318606627, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8711576264038056, "success_rate.epoch.global": 0.9010295582862836, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967277486910995, "tokens_p.mean_in_band": 0.68115234375, "tokens_rate.above_band": 0.9794871794871794, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020512820512820513 }, { "epoch": 1.6435875585854283, "grad_norm": 95.39845993364986, "learning_rate": 3.725034885589208e-07, "loss": 0.318, "step": 7715, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.951048951048951, "success_rate.epoch.env.logic": 0.9151047409040793, "success_rate.epoch.env.math": 0.9729252501471454, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8329800763035184, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712183637489375, "success_rate.epoch.global": 0.9011936339522546, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9903980446927374, "tokens_p.mean_in_band": 0.869140625, "tokens_rate.above_band": 0.988950276243094, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011049723756906077 }, { "epoch": 1.6446527481891777, "grad_norm": 329.82682850415637, "learning_rate": 3.7247104743593026e-07, "loss": 0.3167, "step": 7720, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9152915291529153, "success_rate.epoch.env.math": 0.9729570840681951, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8328396106644097, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712553930179613, "success_rate.epoch.global": 0.9011916583912611, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992129629629629, "tokens_p.mean_in_band": 0.6227678571428571, "tokens_rate.above_band": 0.9897360703812317, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010263929618768328 }, { "epoch": 1.6457179377929272, "grad_norm": 18.034470361736645, "learning_rate": 3.7243859722413e-07, "loss": 0.0787, "step": 7725, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9153846153846154, "success_rate.epoch.env.math": 0.9730363423212193, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8329103214890017, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713013773546326, "success_rate.epoch.global": 0.9013549239920687, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964689265536724, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.6467831273966766, "grad_norm": 119.38978016918082, "learning_rate": 3.7240613794716945e-07, "loss": 0.4119, "step": 7730, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9155701754385965, "success_rate.epoch.env.math": 0.9730679156908665, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8333333333333334, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713595723789925, "success_rate.epoch.global": 0.9015176509402837, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9910239361702128, "tokens_p.mean_in_band": 0.6832682291666666, "tokens_rate.above_band": 0.8867924528301887, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11320754716981132 }, { "epoch": 1.647848317000426, "grad_norm": 187.38384494249783, "learning_rate": 3.723736696287047e-07, "loss": 0.2842, "step": 7735, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9145673603504929, "success_rate.epoch.env.math": 0.973115137346581, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8336842105263158, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870846325435917, "success_rate.epoch.global": 0.9013504611330698, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979801829268292, "tokens_p.mean_in_band": 0.564453125, "tokens_rate.above_band": 0.9613130128956624, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038686987104337635 }, { "epoch": 1.6489135066041756, "grad_norm": 56.74886303063964, "learning_rate": 3.723411922923985e-07, "loss": 0.2978, "step": 7740, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9136612021857924, "success_rate.epoch.env.math": 0.9732090856144437, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8337542087542088, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708291889074705, "success_rate.epoch.global": 0.9013482407102926, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962518740629686, "tokens_p.mean_in_band": 0.4126953125, "tokens_rate.above_band": 0.9569583931133429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043041606886657105 }, { "epoch": 1.649978696207925, "grad_norm": 394.70224775920286, "learning_rate": 3.7230870596192e-07, "loss": 0.332, "step": 7745, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9136612021857924, "success_rate.epoch.env.math": 0.9726902963393376, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8337531486146096, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707819298697701, "success_rate.epoch.global": 0.9011818778726198, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7916666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9930862831858407, "tokens_p.mean_in_band": 0.455078125, "tokens_rate.above_band": 0.904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.096 }, { "epoch": 1.6510438858116745, "grad_norm": 334.73305344806573, "learning_rate": 3.7227621066094506e-07, "loss": 0.3101, "step": 7750, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.867816091954023, "success_rate.epoch.env.agentgym:sciworld": 0.9695431472081218, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9138495092693566, "success_rate.epoch.env.math": 0.9727061556329849, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8336127409891031, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708713134779903, "success_rate.epoch.global": 0.9011799410029498, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991359447004609, "tokens_p.mean_in_band": 0.6497802734375, "tokens_rate.above_band": 0.9938931297709923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0061068702290076335 }, { "epoch": 1.652109075415424, "grad_norm": 276.7578517848306, "learning_rate": 3.72243706413156e-07, "loss": 0.175, "step": 7755, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9138495092693566, "success_rate.epoch.env.math": 0.9727694090382387, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.833821682712432, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710465959763166, "success_rate.epoch.global": 0.9013416230366492, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991508152173914, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9986431478968792, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0013568521031207597 }, { "epoch": 1.6531742650191734, "grad_norm": 115.57766909779146, "learning_rate": 3.7221119324224174e-07, "loss": 0.339, "step": 7760, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9131378935939196, "success_rate.epoch.env.math": 0.9722382880277617, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8339606859054789, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8709600998891638, "success_rate.epoch.global": 0.9011760862463247, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9966755319148937, "tokens_p.mean_in_band": 0.5173611111111112, "tokens_rate.above_band": 0.9543147208121827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04568527918781726 }, { "epoch": 1.6542394546229229, "grad_norm": 67.82084549987316, "learning_rate": 3.7217867117189754e-07, "loss": 0.3183, "step": 7765, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8700564971751412, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.913232104121475, "success_rate.epoch.env.math": 0.9722382880277617, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.833959115561118, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8682101300922476, "success_rate.epoch.global": 0.9010110893672537, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996662265258216, "tokens_p.mean_in_band": 0.6405222039473685, "tokens_rate.above_band": 0.9573033707865168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04269662921348315 }, { "epoch": 1.6553046442266723, "grad_norm": 63.4554497851929, "learning_rate": 3.721461402258253e-07, "loss": 0.2909, "step": 7770, "success_rate.epoch.env.abd": 0.9855595667870036, "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.9133261105092091, "success_rate.epoch.env.math": 0.9722703639514731, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8338192419825073, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8679387478257078, "success_rate.epoch.global": 0.9008466297622925, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981711195928753, "tokens_p.mean_in_band": 0.6474609375, "tokens_rate.above_band": 0.960880195599022, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039119804400977995 }, { "epoch": 1.6563698338304218, "grad_norm": 96.70753254800114, "learning_rate": 3.721136004277334e-07, "loss": 0.2652, "step": 7775, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.9136069114470843, "success_rate.epoch.env.math": 0.972318339100346, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8336106489184693, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8679543957372601, "success_rate.epoch.global": 0.9008452535760728, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969429347826086, "tokens_p.mean_in_band": 0.26953125, "tokens_rate.above_band": 0.968421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031578947368421054 }, { "epoch": 1.6574350234341713, "grad_norm": 104.1371359488132, "learning_rate": 3.7208105180133656e-07, "loss": 0.295, "step": 7780, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9137001078748651, "success_rate.epoch.env.math": 0.9723820483314154, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8338870431893688, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8680090779614843, "success_rate.epoch.global": 0.9010061668289516, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963942307692307, "tokens_p.mean_in_band": 0.755859375, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 1.6585002130379207, "grad_norm": 45.06907586218594, "learning_rate": 3.7204849437035593e-07, "loss": 0.4137, "step": 7785, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8715083798882681, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9138858988159311, "success_rate.epoch.env.math": 0.9724296381390006, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8341625207296849, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8681209617181481, "success_rate.epoch.global": 0.9011665586519767, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956550802139037, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9894179894179894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010582010582010581 }, { "epoch": 1.6595654026416702, "grad_norm": 1030.1108191459152, "learning_rate": 3.720159281585192e-07, "loss": 0.2991, "step": 7790, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8715083798882681, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9139784946236559, "success_rate.epoch.env.math": 0.971919770773639, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8340231788079471, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.868084065380827, "success_rate.epoch.global": 0.9010029116790682, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973591549295775, "tokens_p.mean_in_band": 0.611328125, "tokens_rate.above_band": 0.9466666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05333333333333334 }, { "epoch": 1.6606305922454196, "grad_norm": 142.16290615659017, "learning_rate": 3.7198335318956043e-07, "loss": 0.2497, "step": 7795, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9139784946236559, "success_rate.epoch.env.math": 0.9714448886350657, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8342290202563043, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8681245018938945, "success_rate.epoch.global": 0.901001291989664, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967105263157895, "tokens_p.mean_in_band": 0.6663411458333334, "tokens_rate.above_band": 0.9858490566037735, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014150943396226415 }, { "epoch": 1.661695781849169, "grad_norm": 97.74415695452171, "learning_rate": 3.7195076948721994e-07, "loss": 0.5769, "step": 7800, "success_rate.epoch.env.abd": 0.985663082437276, "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9517241379310345, "success_rate.epoch.env.logic": 0.9130901287553648, "success_rate.epoch.env.math": 0.9714611872146118, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8345709570957096, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8680961824122579, "success_rate.epoch.global": 0.9009996775233795, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962789415656009, "tokens_p.mean_in_band": 0.5932291666666667, "tokens_rate.above_band": 0.967982924226254, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032017075773746 }, { "epoch": 1.6627609714529186, "grad_norm": 232.95073519629534, "learning_rate": 3.719181770752445e-07, "loss": 0.191, "step": 7805, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9517241379310345, "success_rate.epoch.env.logic": 0.9132762312633833, "success_rate.epoch.env.math": 0.9715261958997722, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8343634116192831, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8681047977753269, "success_rate.epoch.global": 0.9009980682549903, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959239130434783, "tokens_p.mean_in_band": 0.4125, "tokens_rate.above_band": 0.9484536082474226, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05154639175257732 }, { "epoch": 1.6638261610566683, "grad_norm": 143.5885360203726, "learning_rate": 3.7188557597738726e-07, "loss": 0.2483, "step": 7810, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9518900343642611, "success_rate.epoch.env.logic": 0.9134615384615384, "success_rate.epoch.env.math": 0.9715909090909091, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8344997941539728, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8682191844962877, "success_rate.epoch.global": 0.9011571841851495, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992260061919505, "tokens_p.mean_in_band": 0.8138020833333334, "tokens_rate.above_band": 0.9953775038520801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004622496147919877 }, { "epoch": 1.6648913506604175, "grad_norm": 104.78913253365752, "learning_rate": 3.718529662174077e-07, "loss": 0.1087, "step": 7815, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.952054794520548, "success_rate.epoch.env.logic": 0.9124866595517609, "success_rate.epoch.env.math": 0.9716231555051078, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8348397699260477, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8681839974302562, "success_rate.epoch.global": 0.9011553273427471, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990706319702602, "tokens_p.mean_in_band": 0.6966145833333334, "tokens_rate.above_band": 0.9889705882352942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011029411764705883 }, { "epoch": 1.6659565402641672, "grad_norm": 109.81806305288673, "learning_rate": 3.7182034781907153e-07, "loss": 0.3105, "step": 7820, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8736263736263736, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9116080937167199, "success_rate.epoch.env.math": 0.9716713881019831, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8350430857611818, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8682053442431059, "success_rate.epoch.global": 0.9011534764498558, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961106115107914, "tokens_p.mean_in_band": 0.7075892857142857, "tokens_rate.above_band": 0.99002849002849, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009971509971509971 }, { "epoch": 1.6670217298679164, "grad_norm": 174.628180767647, "learning_rate": 3.717877208061508e-07, "loss": 0.1941, "step": 7825, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9116080937167199, "success_rate.epoch.env.math": 0.9717194570135747, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.834903727980336, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8683544166725216, "success_rate.epoch.global": 0.9011516314779271, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991020114942529, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_rate.above_band": 0.997134670487106, "tokens_rate.below_band": 0.0028653295128939827, "tokens_rate.in_band": 0.0 }, { "epoch": 1.668086919471666, "grad_norm": 135.3204835471677, "learning_rate": 3.7175508520242383e-07, "loss": 0.4867, "step": 7830, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9116080937167199, "success_rate.epoch.env.math": 0.9717992103778906, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8343558282208589, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8683252920885639, "success_rate.epoch.global": 0.900990099009901, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_in_band": 0.5114182692307693, "tokens_rate.above_band": 0.91875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08125 }, { "epoch": 1.6691521090754153, "grad_norm": 52.76944006340344, "learning_rate": 3.717224410316753e-07, "loss": 0.3032, "step": 7835, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9117021276595745, "success_rate.epoch.env.math": 0.9719101123595506, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8344235486508585, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8684121754034553, "success_rate.epoch.global": 0.9011479591836735, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989795918367347, "tokens_p.mean_in_band": 0.60546875, "tokens_rate.above_band": 0.9919028340080972, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008097165991902834 }, { "epoch": 1.670217298679165, "grad_norm": 103.25510566671726, "learning_rate": 3.7168978831769595e-07, "loss": 0.1262, "step": 7840, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9117959617428267, "success_rate.epoch.env.math": 0.9719730941704036, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8343533251733987, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.868420047441332, "success_rate.epoch.global": 0.9011461318051576, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916424418604651, "tokens_p.mean_in_band": 0.470703125, "tokens_rate.above_band": 0.9148936170212766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0851063829787234 }, { "epoch": 1.6712824882829143, "grad_norm": 76.9395039492631, "learning_rate": 3.716571270842828e-07, "loss": 0.2373, "step": 7845, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9110169491525424, "success_rate.epoch.env.math": 0.9719887955182073, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.8345558272208639, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.868073851571861, "success_rate.epoch.global": 0.9009853782581055, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977678571428571, "tokens_p.mean_in_band": 0.5089142628205128, "tokens_rate.above_band": 0.965938864628821, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03406113537117904 }, { "epoch": 1.672347677886664, "grad_norm": 57.945199572414275, "learning_rate": 3.7162445735523933e-07, "loss": 0.2576, "step": 7850, "success_rate.epoch.env.abd": 0.9858156028368794, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9428571428571428, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9111111111111111, "success_rate.epoch.env.math": 0.9720357941834452, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.8347578347578347, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8656033037996725, "success_rate.epoch.global": 0.900983814662012, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967512376237624, "tokens_p.mean_in_band": 0.76220703125, "tokens_rate.above_band": 0.9901960784313726, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00980392156862745 }, { "epoch": 1.6734128674904132, "grad_norm": 84.09873849250378, "learning_rate": 3.715917791543748e-07, "loss": 0.3459, "step": 7855, "success_rate.epoch.env.abd": 0.9858156028368794, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9428571428571428, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9113924050632911, "success_rate.epoch.env.math": 0.9720826353992184, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8345528455284553, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8653077880519706, "success_rate.epoch.global": 0.9008238276299113, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949324324324325, "tokens_p.mean_in_band": 0.6822916666666666, "tokens_rate.above_band": 0.8457142857142858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15428571428571428 }, { "epoch": 1.6744780570941629, "grad_norm": 84.71408702788854, "learning_rate": 3.7155909250550494e-07, "loss": 0.2583, "step": 7860, "success_rate.epoch.env.abd": 0.9858657243816255, "success_rate.epoch.env.agentgym:alfworld": 0.8763440860215054, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9428571428571428, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9104320337197049, "success_rate.epoch.env.math": 0.9720982142857143, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8344155844155844, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8653493371007628, "success_rate.epoch.global": 0.9006643467257197, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980292792792793, "tokens_p.mean_in_band": 0.6193576388888888, "tokens_rate.above_band": 0.9866666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013333333333333334 }, { "epoch": 1.675543246697912, "grad_norm": 75.08572573751955, "learning_rate": 3.7152639743245156e-07, "loss": 0.4514, "step": 7865, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8770053475935828, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9095688748685594, "success_rate.epoch.env.math": 0.9720982142857143, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8343458890238963, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8654734714182779, "success_rate.epoch.global": 0.9005053695514845, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.86, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974023929471033, "tokens_p.mean_in_band": 0.65875, "tokens_rate.above_band": 0.9407582938388626, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05924170616113744 }, { "epoch": 1.6766084363016618, "grad_norm": 42.78193198434463, "learning_rate": 3.714936939590425e-07, "loss": 0.1738, "step": 7870, "success_rate.epoch.env.abd": 0.9859649122807017, "success_rate.epoch.env.agentgym:alfworld": 0.8776595744680851, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9097586568730325, "success_rate.epoch.env.math": 0.9721448467966574, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8337378640776699, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8655036564879732, "success_rate.epoch.global": 0.9003468937243772, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997995283018868, "tokens_p.mean_in_band": 0.7364783653846154, "tokens_rate.above_band": 0.9532374100719424, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046762589928057555 }, { "epoch": 1.677673625905411, "grad_norm": 76.61180309856198, "learning_rate": 3.714609821091119e-07, "loss": 0.2391, "step": 7875, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8776595744680851, "success_rate.epoch.env.agentgym:sciworld": 0.9707317073170731, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.909853249475891, "success_rate.epoch.env.math": 0.9721913236929922, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8336025848142165, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8655216870326985, "success_rate.epoch.global": 0.9003463476070529, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996846619576185, "tokens_p.mean_in_band": 0.6624348958333334, "tokens_rate.above_band": 0.9969818913480886, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0030181086519114686 }, { "epoch": 1.6787388155091607, "grad_norm": 63.07588982984957, "learning_rate": 3.7142826190649993e-07, "loss": 0.4093, "step": 7880, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.909853249475891, "success_rate.epoch.env.math": 0.9722222222222222, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8338709677419355, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8656788828963904, "success_rate.epoch.global": 0.9004873447571137, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986702127659575, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9939577039274925, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006042296072507553 }, { "epoch": 1.67980400511291, "grad_norm": 93.0766110963747, "learning_rate": 3.713955333750528e-07, "loss": 0.2856, "step": 7885, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9527027027027027, "success_rate.epoch.env.logic": 0.909853249475891, "success_rate.epoch.env.math": 0.9722376457523598, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.833869670152856, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8657936221514113, "success_rate.epoch.global": 0.9004865798147857, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993898186889819, "tokens_p.mean_in_band": 0.6865234375, "tokens_rate.above_band": 0.9972183588317107, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0027816411682892906 }, { "epoch": 1.6808691947166596, "grad_norm": 190.316150312791, "learning_rate": 3.7136279653862284e-07, "loss": 0.2751, "step": 7890, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9527027027027027, "success_rate.epoch.env.logic": 0.910135841170324, "success_rate.epoch.env.math": 0.9722376457523598, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.833868378812199, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8658768115846098, "success_rate.epoch.global": 0.9004858172700204, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99755859375, "tokens_p.mean_in_band": 0.6453993055555556, "tokens_rate.above_band": 0.9660377358490566, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033962264150943396 }, { "epoch": 1.6819343843204089, "grad_norm": 131.79297529971197, "learning_rate": 3.713300514210684e-07, "loss": 0.2255, "step": 7895, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9530201342281879, "success_rate.epoch.env.logic": 0.9102296450939458, "success_rate.epoch.env.math": 0.9722530521642619, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8342010412494995, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8659458392480925, "success_rate.epoch.global": 0.9006259780907668, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996260683760684, "tokens_p.mean_in_band": 0.7464488636363636, "tokens_rate.above_band": 0.9906858594411516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009314140558848433 }, { "epoch": 1.6829995739241586, "grad_norm": 431.9338913833118, "learning_rate": 3.712972980462539e-07, "loss": 0.4689, "step": 7900, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.8802083333333334, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9530201342281879, "success_rate.epoch.env.logic": 0.9102296450939458, "success_rate.epoch.env.math": 0.9723145071982281, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8336665333866453, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8659642811051864, "success_rate.epoch.global": 0.90046875, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995788409703504, "tokens_p.mean_in_band": 0.540625, "tokens_rate.above_band": 0.9867021276595744, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013297872340425532 }, { "epoch": 1.684064763527908, "grad_norm": 112.17255266011806, "learning_rate": 3.712645364380498e-07, "loss": 0.214, "step": 7905, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.8808290155440415, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9094693028095734, "success_rate.epoch.env.math": 0.9723604201216142, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8337994406711946, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8659821250534745, "success_rate.epoch.global": 0.9004680187207488, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981060606060606, "tokens_p.mean_in_band": 0.49107142857142855, "tokens_rate.above_band": 0.9889589905362776, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011041009463722398 }, { "epoch": 1.6851299531316575, "grad_norm": 126.85344346925532, "learning_rate": 3.7123176662033244e-07, "loss": 0.4201, "step": 7910, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.8808290155440415, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9095634095634095, "success_rate.epoch.env.math": 0.9723909442297074, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8326026305300916, "success_rate.epoch.env.webshop": 0.9705882352941176, "success_rate.epoch.env_macro_mean": 0.8659656783551085, "success_rate.epoch.global": 0.9, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9980742296918768, "tokens_p.mean_in_band": 0.5420386904761905, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 1.686195142735407, "grad_norm": 144.61063935745233, "learning_rate": 3.7119898861698433e-07, "loss": 0.2579, "step": 7915, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9663461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9098445595854923, "success_rate.epoch.env.math": 0.9724214009928296, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8327359617682198, "success_rate.epoch.env.webshop": 0.9705882352941176, "success_rate.epoch.env_macro_mean": 0.8656419761625603, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979748255234298, "tokens_p.mean_in_band": 0.441650390625, "tokens_rate.above_band": 0.9980099502487563, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001990049751243781 }, { "epoch": 1.6872603323391564, "grad_norm": 345.36578703149934, "learning_rate": 3.711662024518937e-07, "loss": 0.3189, "step": 7920, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9099378881987578, "success_rate.epoch.env.math": 0.9724517906336089, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8330683624801272, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.865774474188719, "success_rate.epoch.global": 0.9001552795031056, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987836826347305, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9940476190476191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005952380952380952 }, { "epoch": 1.6883255219429059, "grad_norm": 363.8245621848165, "learning_rate": 3.711334081489551e-07, "loss": 0.1861, "step": 7925, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9099378881987578, "success_rate.epoch.env.math": 0.9724972497249725, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8333333333333334, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8658357518091786, "success_rate.epoch.global": 0.9003100775193799, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987458193979933, "tokens_p.mean_in_band": 0.740625, "tokens_rate.above_band": 0.9917081260364843, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008291873963515755 }, { "epoch": 1.6893907115466553, "grad_norm": 654.546833984525, "learning_rate": 3.711006057320686e-07, "loss": 0.5475, "step": 7930, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9100310237849017, "success_rate.epoch.env.math": 0.9725576289791438, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8332673267326732, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8658437071036927, "success_rate.epoch.global": 0.9003095975232198, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9912014563106796, "tokens_p.mean_in_band": 0.675, "tokens_rate.above_band": 0.9537037037037037, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046296296296296294 }, { "epoch": 1.6904559011504048, "grad_norm": 32.725839028054885, "learning_rate": 3.710677952251404e-07, "loss": 0.1983, "step": 7935, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9102167182662538, "success_rate.epoch.env.math": 0.9726027397260274, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8328063241106719, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8658227800678958, "success_rate.epoch.global": 0.9001545595054096, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951636904761905, "tokens_p.mean_in_band": 0.69296875, "tokens_rate.above_band": 0.8936170212765957, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10638297872340426 }, { "epoch": 1.6915210907541542, "grad_norm": 134.6269161042348, "learning_rate": 3.7103497665208255e-07, "loss": 0.1893, "step": 7940, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9444444444444444, "success_rate.epoch.env.ded": 0.9501661129568106, "success_rate.epoch.env.logic": 0.9104938271604939, "success_rate.epoch.env.math": 0.9726327312534209, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8329383886255924, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8655934754719575, "success_rate.epoch.global": 0.9001543209876544, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968370445344129, "tokens_p.mean_in_band": 0.3046875, "tokens_rate.above_band": 0.9959677419354839, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004032258064516129 }, { "epoch": 1.6925862803579037, "grad_norm": 148.10737098421555, "learning_rate": 3.7100215003681305e-07, "loss": 0.2395, "step": 7945, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9503311258278145, "success_rate.epoch.env.logic": 0.9105858170606372, "success_rate.epoch.env.math": 0.9726626571897211, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.832807570977918, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8657583936616832, "success_rate.epoch.global": 0.9001540832049306, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980636833046471, "tokens_p.mean_in_band": 0.6337890625, "tokens_rate.above_band": 0.9797639123102867, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02023608768971332 }, { "epoch": 1.6936514699616532, "grad_norm": 95.46044006950885, "learning_rate": 3.709693154032557e-07, "loss": 0.4849, "step": 7950, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9504950495049505, "success_rate.epoch.env.logic": 0.9098360655737705, "success_rate.epoch.env.math": 0.9721615720524017, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8329393223010244, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.865726831976071, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973572938689218, "tokens_p.mean_in_band": 0.545166015625, "tokens_rate.above_band": 0.967280163599182, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032719836400818 }, { "epoch": 1.6947166595654026, "grad_norm": 64.33283153312027, "learning_rate": 3.7093647277534005e-07, "loss": 0.2293, "step": 7955, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9509803921568627, "success_rate.epoch.env.logic": 0.9100204498977505, "success_rate.epoch.env.math": 0.9721919302071974, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8326771653543307, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8658387937013114, "success_rate.epoch.global": 0.9, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994565217391305, "tokens_p.mean_in_band": 0.337890625, "tokens_rate.above_band": 0.9971098265895953, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002890173410404624 }, { "epoch": 1.695781849169152, "grad_norm": 115.02976587760558, "learning_rate": 3.7090362217700165e-07, "loss": 0.2314, "step": 7960, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9509803921568627, "success_rate.epoch.env.logic": 0.910295616717635, "success_rate.epoch.env.math": 0.9722524483133841, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8328088119590873, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8659359855834965, "success_rate.epoch.global": 0.9001533742331288, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996895032051282, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.6968470387729016, "grad_norm": 407.4669622444961, "learning_rate": 3.7087076363218177e-07, "loss": 0.2165, "step": 7965, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.9669811320754716, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9509803921568627, "success_rate.epoch.env.logic": 0.9103869653767821, "success_rate.epoch.env.math": 0.9723127035830619, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8327444051825678, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8659439125973424, "success_rate.epoch.global": 0.9001531393568147, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9926658163265306, "tokens_p.mean_in_band": 0.621875, "tokens_rate.above_band": 0.9074074074074074, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09259259259259259 }, { "epoch": 1.697912228376651, "grad_norm": 88.46519403789512, "learning_rate": 3.708378971648275e-07, "loss": 0.2573, "step": 7970, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9511400651465798, "success_rate.epoch.env.logic": 0.9096446700507614, "success_rate.epoch.env.math": 0.9723427331887202, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8328100470957613, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8659959478231603, "success_rate.epoch.global": 0.9001529051987768, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992529880478087, "tokens_p.mean_in_band": 0.4921875, "tokens_rate.above_band": 0.984313725490196, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01568627450980392 }, { "epoch": 1.6989774179804005, "grad_norm": 463.94907514086873, "learning_rate": 3.708050227988916e-07, "loss": 0.2542, "step": 7975, "success_rate.epoch.env.abd": 0.9862542955326461, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9512987012987013, "success_rate.epoch.env.logic": 0.9089989888776542, "success_rate.epoch.env.math": 0.9718462371413102, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8328756375049039, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8659168067614939, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8833333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969574036511156, "tokens_p.mean_below_band": 1.7229467630386353e-08, "tokens_p.mean_in_band": 0.6917067307692307, "tokens_rate.above_band": 0.9723865877712031, "tokens_rate.below_band": 0.0019723865877712033, "tokens_rate.in_band": 0.02564102564102564 }, { "epoch": 1.70004260758415, "grad_norm": 396.01476407258144, "learning_rate": 3.707721405583328e-07, "loss": 0.5476, "step": 7980, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9512987012987013, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9718918918918918, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8330721003134797, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8656000730947819, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994158878504673, "tokens_p.mean_in_band": 0.69140625, "tokens_rate.above_band": 0.9907407407407407, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009259259259259259 }, { "epoch": 1.7011077971878994, "grad_norm": 953.6407265468724, "learning_rate": 3.707392504671153e-07, "loss": 0.3446, "step": 7985, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9093655589123867, "success_rate.epoch.env.math": 0.9719373988127361, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8328112764291308, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8656678443188818, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998078893442623, "tokens_p.mean_in_band": 0.5026041666666666, "tokens_rate.above_band": 0.9938900203665988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006109979633401222 }, { "epoch": 1.7021729867916489, "grad_norm": 51.83978408833236, "learning_rate": 3.707063525492093e-07, "loss": 0.1982, "step": 7990, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9094567404426559, "success_rate.epoch.env.math": 0.9714593430263866, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8330727130570759, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8653774214521952, "success_rate.epoch.global": 0.8998480243161094, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9879907024793388, "tokens_p.mean_in_band": 0.7586379278273809, "tokens_rate.above_band": 0.852112676056338, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14788732394366197 }, { "epoch": 1.7032381763953985, "grad_norm": 179.74604009249427, "learning_rate": 3.706734468285905e-07, "loss": 0.3836, "step": 7995, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9095477386934674, "success_rate.epoch.env.math": 0.9715359828141783, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8333333333333334, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8654163540262734, "success_rate.epoch.global": 0.9, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943524096385542, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9540229885057471, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04597701149425287 }, { "epoch": 1.7043033659991478, "grad_norm": 133.6542587357131, "learning_rate": 3.7064053332924024e-07, "loss": 0.1729, "step": 8000, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8793969849246231, "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9097291875626881, "success_rate.epoch.env.math": 0.9715512614063339, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8336579664978574, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8655329547173274, "success_rate.epoch.global": 0.9001515151515151, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993083501006036, "tokens_p.mean_in_band": 0.783203125, "tokens_rate.above_band": 0.9841584158415841, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015841584158415842 }, { "epoch": 1.7053685556028975, "grad_norm": 72.77279599004258, "learning_rate": 3.706076120751459e-07, "loss": 0.2909, "step": 8005, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9099099099099099, "success_rate.epoch.env.math": 0.9715817694369974, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8339813374805599, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8656363743933241, "success_rate.epoch.global": 0.900302571860817, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954690831556503, "tokens_p.mean_in_band": 0.767578125, "tokens_rate.above_band": 0.9791231732776617, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020876826722338204 }, { "epoch": 1.7064337452066467, "grad_norm": 52.66562918063131, "learning_rate": 3.705746830903e-07, "loss": 0.1271, "step": 8010, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9099099099099099, "success_rate.epoch.env.math": 0.971627408993576, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8342391304347826, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8657077395371452, "success_rate.epoch.global": 0.9004531722054381, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987855007473841, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9955357142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004464285714285714 }, { "epoch": 1.7074989348103964, "grad_norm": 77.66567174897885, "learning_rate": 3.705417463987011e-07, "loss": 0.3153, "step": 8015, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9716577540106952, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8337853545137544, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8655990414717789, "success_rate.epoch.global": 0.9001508295625943, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.775, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0003375771604939, "tokens_p.mean_in_band": 0.534148185483871, "tokens_rate.above_band": 0.9543446244477173, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045655375552282766 }, { "epoch": 1.7085641244141456, "grad_norm": 639.6188200208629, "learning_rate": 3.705088020243532e-07, "loss": 0.4426, "step": 8020, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9488817891373802, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9717031500266952, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8336557059961315, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656845113229089, "success_rate.epoch.global": 0.9001506024096385, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977034120734908, "tokens_p.mean_in_band": 0.734375, "tokens_rate.above_band": 0.9857697283311773, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014230271668822769 }, { "epoch": 1.7096293140178953, "grad_norm": 79.15637329562013, "learning_rate": 3.7047584999126587e-07, "loss": 0.2122, "step": 8025, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9490445859872612, "success_rate.epoch.env.logic": 0.9093625498007968, "success_rate.epoch.env.math": 0.9712306872669153, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8337843061461152, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.86569274540927, "success_rate.epoch.global": 0.9001503759398496, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975694444444444, "tokens_p.mean_in_band": 0.79296875, "tokens_rate.above_band": 0.9761388286334056, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02386117136659436 }, { "epoch": 1.7106945036216445, "grad_norm": 67.31530185294233, "learning_rate": 3.7044289032345433e-07, "loss": 0.2967, "step": 8030, "success_rate.epoch.env.abd": 0.9864406779661017, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9490445859872612, "success_rate.epoch.env.logic": 0.9093625498007968, "success_rate.epoch.env.math": 0.9712765957446808, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8337191358024691, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656994084304815, "success_rate.epoch.global": 0.9001501501501501, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955645161290323, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9281437125748503, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0718562874251497 }, { "epoch": 1.7117596932253942, "grad_norm": 142.3132195378032, "learning_rate": 3.704099230449394e-07, "loss": 0.2215, "step": 8035, "success_rate.epoch.env.abd": 0.9865319865319865, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9492063492063492, "success_rate.epoch.env.logic": 0.9093625498007968, "success_rate.epoch.env.math": 0.9712918660287081, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8336542164035425, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8657313534707618, "success_rate.epoch.global": 0.9001499250374813, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999498820754717, "tokens_p.mean_in_band": 0.5667613636363636, "tokens_rate.above_band": 0.996552804763397, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0034471952366029457 }, { "epoch": 1.7128248828291435, "grad_norm": 88.10647320103408, "learning_rate": 3.703769481797474e-07, "loss": 0.2556, "step": 8040, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9492063492063492, "success_rate.epoch.env.logic": 0.9086395233366436, "success_rate.epoch.env.math": 0.9713375796178344, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8338461538461538, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656913370383665, "success_rate.epoch.global": 0.9001497005988024, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961387973640856, "tokens_p.mean_in_band": 0.45703125, "tokens_rate.above_band": 0.9499217527386542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050078247261345854 }, { "epoch": 1.7138900724328932, "grad_norm": 402.29852730174576, "learning_rate": 3.703439657519101e-07, "loss": 0.239, "step": 8045, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.908820614469772, "success_rate.epoch.env.math": 0.9713831478537361, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8336534767575874, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8657549526917083, "success_rate.epoch.global": 0.9001494768310911, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995259626604434, "tokens_p.mean_in_band": 0.701171875, "tokens_rate.above_band": 0.9976717112922002, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002328288707799767 }, { "epoch": 1.7149552620366424, "grad_norm": 40.620780802998205, "learning_rate": 3.7031097578546485e-07, "loss": 0.3006, "step": 8050, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.908820614469772, "success_rate.epoch.env.math": 0.9714285714285714, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8337164750957854, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8657648092292568, "success_rate.epoch.global": 0.9001492537313432, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9889705882352942, "tokens_p.mean_in_band": 0.5950520833333334, "tokens_rate.above_band": 0.9340659340659341, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06593406593406594 }, { "epoch": 1.716020451640392, "grad_norm": 62.86038435203946, "learning_rate": 3.702779783044546e-07, "loss": 0.2795, "step": 8055, "success_rate.epoch.env.abd": 0.9866220735785953, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9080118694362018, "success_rate.epoch.env.math": 0.9714889123548046, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8339073861461921, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8657182091668577, "success_rate.epoch.global": 0.9001490312965723, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0010758998435054, "tokens_p.mean_in_band": 0.5855263157894737, "tokens_rate.above_band": 0.9711246200607903, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028875379939209727 }, { "epoch": 1.7170856412441413, "grad_norm": 221.09680667015974, "learning_rate": 3.7024497333292757e-07, "loss": 0.3113, "step": 8060, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9072063178677197, "success_rate.epoch.env.math": 0.9715789473684211, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8339709257842387, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656629924551534, "success_rate.epoch.global": 0.9001488095238095, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0004098360655738, "tokens_p.mean_in_band": 0.4609375, "tokens_rate.above_band": 0.9744408945686901, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025559105431309903 }, { "epoch": 1.718150830847891, "grad_norm": 63.3426700401906, "learning_rate": 3.702119608949377e-07, "loss": 0.3124, "step": 8065, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9072063178677197, "success_rate.epoch.env.math": 0.9711286089238845, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8341612533435231, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8652973208985284, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992666967509025, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.6213235294117647, "tokens_rate.above_band": 0.9685314685314685, "tokens_rate.below_band": 0.0017482517482517483, "tokens_rate.in_band": 0.02972027972027972 }, { "epoch": 1.7192160204516402, "grad_norm": 99.12312219549374, "learning_rate": 3.7017894101454405e-07, "loss": 0.2504, "step": 8070, "success_rate.epoch.env.abd": 0.9867109634551495, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9495268138801262, "success_rate.epoch.env.logic": 0.9075712881022615, "success_rate.epoch.env.math": 0.9711437565582371, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.834351145038168, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8653676874098515, "success_rate.epoch.global": 0.9001483679525223, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993327402135231, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.72028121005539, "grad_norm": 146.81034238962428, "learning_rate": 3.701459137158113e-07, "loss": 0.3457, "step": 8075, "success_rate.epoch.env.abd": 0.9867549668874173, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9495268138801262, "success_rate.epoch.env.logic": 0.9076620825147348, "success_rate.epoch.env.math": 0.9711891042430592, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8342857142857143, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8653781160259523, "success_rate.epoch.global": 0.9001481481481481, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992879746835443, "tokens_p.mean_in_band": 0.5622209821428571, "tokens_rate.above_band": 0.9575757575757575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04242424242424243 }, { "epoch": 1.7213463996591394, "grad_norm": 84.3736690260916, "learning_rate": 3.701128790228096e-07, "loss": 0.3187, "step": 8080, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9495268138801262, "success_rate.epoch.env.logic": 0.9059745347698335, "success_rate.epoch.env.math": 0.9712192569335426, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8345378470901483, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8649543088166417, "success_rate.epoch.global": 0.8998520710059171, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9911406911142454, "tokens_p.mean_in_band": 0.24870102611940298, "tokens_rate.above_band": 0.17467356491746736, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.8253264350825327 }, { "epoch": 1.7224115892628888, "grad_norm": 51.691261578257524, "learning_rate": 3.700798369596143e-07, "loss": 0.1247, "step": 8085, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9498432601880877, "success_rate.epoch.env.logic": 0.9059745347698335, "success_rate.epoch.env.math": 0.9712493465760585, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8345351043643264, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8649855627461558, "success_rate.epoch.global": 0.8998522895125554, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966142590286425, "tokens_p.mean_in_band": 0.5130208333333334, "tokens_rate.above_band": 0.992583436341162, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007416563658838072 }, { "epoch": 1.7234767788666383, "grad_norm": 55.81938843436817, "learning_rate": 3.700467875503063e-07, "loss": 0.5246, "step": 8090, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9498432601880877, "success_rate.epoch.env.logic": 0.9060665362035225, "success_rate.epoch.env.math": 0.9712643678160919, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8345950037850114, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8650140667023277, "success_rate.epoch.global": 0.8998525073746313, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944852941176471, "tokens_p.mean_in_band": 0.6853693181818182, "tokens_rate.above_band": 0.9392265193370166, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06077348066298342 }, { "epoch": 1.7245419684703878, "grad_norm": 225.01972540603396, "learning_rate": 3.700137308189717e-07, "loss": 0.452, "step": 8095, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9498432601880877, "success_rate.epoch.env.logic": 0.9060665362035225, "success_rate.epoch.env.math": 0.9713242961418144, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.834214501510574, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.864984923616081, "success_rate.epoch.global": 0.8997054491899853, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9908854166666666, "tokens_p.mean_in_band": 0.48974609375, "tokens_rate.above_band": 0.9230769230769231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07692307692307693 }, { "epoch": 1.7256071580741372, "grad_norm": 44.033781178941474, "learning_rate": 3.6998066678970207e-07, "loss": 0.4498, "step": 8100, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9060665362035225, "success_rate.epoch.env.math": 0.9713839750260146, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8343396226415094, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8651806233026696, "success_rate.epoch.global": 0.8998529411764706, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993523316062176, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7266723476778867, "grad_norm": 150.84605281193748, "learning_rate": 3.699475954865942e-07, "loss": 0.2065, "step": 8105, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9060665362035225, "success_rate.epoch.env.math": 0.9714285714285714, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8347760632292058, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8652243539381472, "success_rate.epoch.global": 0.9, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9918893129770993, "tokens_p.mean_in_band": 0.806640625, "tokens_rate.above_band": 0.9703703703703703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02962962962962963 }, { "epoch": 1.7277375372816361, "grad_norm": 149.27970364787186, "learning_rate": 3.699145169337502e-07, "loss": 0.1931, "step": 8110, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.906158357771261, "success_rate.epoch.env.math": 0.9714730290456431, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8350864012021036, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8652791157924731, "success_rate.epoch.global": 0.9001466275659824, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969855305466238, "tokens_p.mean_in_band": 0.833984375, "tokens_rate.above_band": 0.9936102236421726, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006389776357827476 }, { "epoch": 1.7288027268853856, "grad_norm": 82.62645146205433, "learning_rate": 3.6988143115527753e-07, "loss": 0.2641, "step": 8115, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9459459459459459, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.90625, "success_rate.epoch.env.math": 0.9715025906735751, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8350824587706147, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8653435937594192, "success_rate.epoch.global": 0.9001464128843338, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969579646017699, "tokens_p.mean_in_band": 0.740234375, "tokens_rate.above_band": 0.9713467048710601, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02865329512893983 }, { "epoch": 1.729867916489135, "grad_norm": 48.766759075903, "learning_rate": 3.698483381752888e-07, "loss": 0.3593, "step": 8120, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9063414634146342, "success_rate.epoch.env.math": 0.9715468184169684, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8349550898203593, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8654868742881383, "success_rate.epoch.global": 0.9001461988304094, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930132113821138, "tokens_p.mean_in_band": 0.65966796875, "tokens_rate.above_band": 0.968503937007874, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031496062992125984 }, { "epoch": 1.7309331060928845, "grad_norm": 88.84159104133133, "learning_rate": 3.6981523801790204e-07, "loss": 0.3669, "step": 8125, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9064327485380117, "success_rate.epoch.env.math": 0.9716055756324212, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8352633545013074, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8655285385626637, "success_rate.epoch.global": 0.9002919708029197, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959846368715084, "tokens_p.mean_in_band": 0.775390625, "tokens_rate.above_band": 0.9675675675675676, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032432432432432434 }, { "epoch": 1.731998295696634, "grad_norm": 136.23742159989214, "learning_rate": 3.697821307072403e-07, "loss": 0.4581, "step": 8130, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9066147859922179, "success_rate.epoch.env.math": 0.9716786817713697, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8350746268656717, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8655345763769836, "success_rate.epoch.global": 0.9002915451895044, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938118811881188, "tokens_p.mean_in_band": 0.6651785714285714, "tokens_rate.above_band": 0.9351851851851852, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06481481481481481 }, { "epoch": 1.7330634853003835, "grad_norm": 156.7926497364516, "learning_rate": 3.6974901626743203e-07, "loss": 0.4604, "step": 8135, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9057337220602527, "success_rate.epoch.env.math": 0.9716932578486875, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8346984363365599, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8654396288345826, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982938218390804, "tokens_p.mean_in_band": 0.4948466532939189, "tokens_rate.above_band": 0.96577243293247, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034227567067530065 }, { "epoch": 1.734128674904133, "grad_norm": 576.5655291026226, "learning_rate": 3.697158947226108e-07, "loss": 0.3873, "step": 8140, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9501557632398754, "success_rate.epoch.env.logic": 0.9058252427184466, "success_rate.epoch.env.math": 0.9717368961973278, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8348828560803273, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656270449410545, "success_rate.epoch.global": 0.9001453488372093, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995508982035928, "tokens_p.mean_in_band": 0.8035714285714286, "tokens_rate.above_band": 0.9916864608076009, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00831353919239905 }, { "epoch": 1.7351938645078824, "grad_norm": 106.07720845603625, "learning_rate": 3.696827660969152e-07, "loss": 0.3657, "step": 8145, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9058252427184466, "success_rate.epoch.env.math": 0.9717948717948718, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8350668647845468, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8654156580001827, "success_rate.epoch.global": 0.9001451378809869, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991685144124168, "tokens_p.mean_in_band": 0.7621527777777778, "tokens_rate.above_band": 0.9616204690831557, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03837953091684435 }, { "epoch": 1.7362590541116318, "grad_norm": 98.6683963210045, "learning_rate": 3.6964963041448934e-07, "loss": 0.2948, "step": 8150, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, "success_rate.epoch.env.agentgym:sciworld": 0.9683257918552036, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9059165858389913, "success_rate.epoch.env.math": 0.9713408393039918, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8350018539117539, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8653767761598984, "success_rate.epoch.global": 0.9, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9912383177570093, "tokens_p.mean_in_band": 0.71640625, "tokens_rate.above_band": 0.8770491803278688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12295081967213115 }, { "epoch": 1.7373242437153813, "grad_norm": 72.99932873016495, "learning_rate": 3.6961648769948215e-07, "loss": 0.3644, "step": 8155, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.968609865470852, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9060077519379846, "success_rate.epoch.env.math": 0.9713554987212276, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8352462051092188, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656089019623913, "success_rate.epoch.global": 0.9001447178002895, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972036891679749, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.9937597503900156, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0062402496099844 }, { "epoch": 1.7383894333191308, "grad_norm": 122.15960408016022, "learning_rate": 3.6958333797604786e-07, "loss": 0.2652, "step": 8160, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.968609865470852, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9062801932367149, "success_rate.epoch.env.math": 0.9714139867279225, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8354289940828402, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656556036241227, "success_rate.epoch.global": 0.9002890173410405, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956018518518519, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7394546229228802, "grad_norm": 81.438866539525, "learning_rate": 3.6955018126834564e-07, "loss": 0.3715, "step": 8165, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.968609865470852, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9062801932367149, "success_rate.epoch.env.math": 0.9709628120224146, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8353636028054633, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656226282873714, "success_rate.epoch.global": 0.9001443001443001, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984497389033943, "tokens_p.mean_in_band": 0.4299879807692308, "tokens_rate.above_band": 0.9671717171717171, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03282828282828283 }, { "epoch": 1.74051981252663, "grad_norm": 14.69828298801578, "learning_rate": 3.6951701760054003e-07, "loss": 0.348, "step": 8170, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9062801932367149, "success_rate.epoch.env.math": 0.9709775967413442, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8350515463917526, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8656596346109814, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981560559006211, "tokens_p.mean_in_band": 0.56953125, "tokens_rate.above_band": 0.9847094801223242, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01529051987767584 }, { "epoch": 1.7415850021303791, "grad_norm": 253.472915473384, "learning_rate": 3.694838469968003e-07, "loss": 0.2324, "step": 8175, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.9688888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9064609450337512, "success_rate.epoch.env.math": 0.971021860701576, "success_rate.epoch.env.sat": 0.12195121951219512, "success_rate.epoch.env.science": 0.835233541743288, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8654321000209712, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992917847025495, "tokens_p.mean_in_band": 0.6957720588235294, "tokens_rate.above_band": 0.9764868603042877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02351313969571231 }, { "epoch": 1.7426501917341288, "grad_norm": 111.48827945199903, "learning_rate": 3.694506694813011e-07, "loss": 0.3127, "step": 8180, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.9688888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9064609450337512, "success_rate.epoch.env.math": 0.9710659898477157, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8355963302752294, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8673696310230646, "success_rate.epoch.global": 0.9001436781609196, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9910714285714286, "tokens_p.mean_in_band": 0.8018973214285714, "tokens_rate.above_band": 0.9565217391304348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043478260869565216 }, { "epoch": 1.743715381337878, "grad_norm": 125.7077269336235, "learning_rate": 3.694174850782219e-07, "loss": 0.3711, "step": 8185, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9065510597302505, "success_rate.epoch.env.math": 0.9711246200607903, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8354105571847508, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8673911836162392, "success_rate.epoch.global": 0.9001434720229555, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977985395189003, "tokens_p.mean_in_band": 0.5066964285714286, "tokens_rate.above_band": 0.9881154499151104, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011884550084889643 }, { "epoch": 1.7447805709416278, "grad_norm": 31.7327380812736, "learning_rate": 3.6938429381174725e-07, "loss": 0.2015, "step": 8190, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9504643962848297, "success_rate.epoch.env.logic": 0.9056785370548605, "success_rate.epoch.env.math": 0.9711684370257967, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8357116721551409, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8673940284599115, "success_rate.epoch.global": 0.9001432664756447, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951923076923077, "tokens_p.mean_in_band": 0.7449776785714286, "tokens_rate.above_band": 0.9852631578947368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014736842105263158 }, { "epoch": 1.745845760545377, "grad_norm": 105.58094073057569, "learning_rate": 3.6935109570606666e-07, "loss": 0.2123, "step": 8195, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9507692307692308, "success_rate.epoch.env.logic": 0.9057692307692308, "success_rate.epoch.env.math": 0.9711830131445905, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8360116873630388, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.86747088023671, "success_rate.epoch.global": 0.9002861230329041, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974264705882353, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9956076134699854, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004392386530014641 }, { "epoch": 1.7469109501491267, "grad_norm": 79.84535168853331, "learning_rate": 3.6931789078537477e-07, "loss": 0.3843, "step": 8200, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9049904030710173, "success_rate.epoch.env.math": 0.9712266532054518, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8357664233576643, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8692212628545971, "success_rate.epoch.global": 0.9001428571428571, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987058723693143, "tokens_p.mean_in_band": 0.6809895833333334, "tokens_rate.above_band": 0.9839679358717435, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01603206412825651 }, { "epoch": 1.747976139752876, "grad_norm": 164.7820734171724, "learning_rate": 3.69284679073871e-07, "loss": 0.3149, "step": 8205, "success_rate.epoch.env.abd": 0.9836601307189542, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9050814956855225, "success_rate.epoch.env.math": 0.9712846347607053, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8358862144420132, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8692975264077076, "success_rate.epoch.global": 0.9002853067047075, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957720588235294, "tokens_p.mean_in_band": 0.7265625, "tokens_rate.above_band": 0.9826589595375722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017341040462427744 }, { "epoch": 1.7490413293566256, "grad_norm": 140.12892335407128, "learning_rate": 3.692514605957599e-07, "loss": 0.2303, "step": 8210, "success_rate.epoch.env.abd": 0.9837133550488599, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9050814956855225, "success_rate.epoch.env.math": 0.9713135379969804, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8363041105856676, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8693429831086018, "success_rate.epoch.global": 0.9004273504273504, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919064748201439, "tokens_p.mean_in_band": 0.720703125, "tokens_rate.above_band": 0.9586206896551724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041379310344827586 }, { "epoch": 1.7501065189603748, "grad_norm": 51.03269048770925, "learning_rate": 3.692182353752507e-07, "loss": 0.355, "step": 8215, "success_rate.epoch.env.abd": 0.9837662337662337, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9052631578947369, "success_rate.epoch.env.math": 0.971356783919598, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.83617871413004, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8693568367807452, "success_rate.epoch.global": 0.9004267425320057, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974264705882353, "tokens_p.mean_in_band": 0.2421875, "tokens_rate.above_band": 0.9770114942528736, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022988505747126436 }, { "epoch": 1.7511717085641245, "grad_norm": 33.93199028743441, "learning_rate": 3.691850034365579e-07, "loss": 0.1626, "step": 8220, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9055343511450382, "success_rate.epoch.env.math": 0.9713855421686747, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8363570391872278, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8690286198370384, "success_rate.epoch.global": 0.9004261363636363, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961586378737541, "tokens_p.mean_in_band": 0.721875, "tokens_rate.above_band": 0.9836601307189542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016339869281045753 }, { "epoch": 1.7522368981678738, "grad_norm": 53.372944742560236, "learning_rate": 3.6915176480390053e-07, "loss": 0.127, "step": 8225, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9510703363914373, "success_rate.epoch.env.logic": 0.9057142857142857, "success_rate.epoch.env.math": 0.9714285714285714, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8358695652173913, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.869016761533016, "success_rate.epoch.global": 0.9002836879432624, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945987654320988, "tokens_p.mean_in_band": 0.6509650735294118, "tokens_rate.above_band": 0.9050279329608939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09497206703910614 }, { "epoch": 1.7533020877716234, "grad_norm": 98.14570350236706, "learning_rate": 3.6911851950150273e-07, "loss": 0.2419, "step": 8230, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9050332383665717, "success_rate.epoch.env.math": 0.971457185778668, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.835988414192614, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8690834540807703, "success_rate.epoch.global": 0.9002832861189801, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987224842767296, "tokens_p.mean_in_band": 0.7760416666666666, "tokens_rate.above_band": 0.9906542056074766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009345794392523364 }, { "epoch": 1.7543672773753727, "grad_norm": 196.41016934536862, "learning_rate": 3.690852675535935e-07, "loss": 0.2736, "step": 8235, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8785046728971962, "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9041745730550285, "success_rate.epoch.env.math": 0.9714857428714357, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8362847849656668, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8690915323231703, "success_rate.epoch.global": 0.9002828854314003, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997014146567718, "tokens_p.mean_in_band": 0.6876446759259259, "tokens_rate.above_band": 0.9522968197879859, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04770318021201413 }, { "epoch": 1.7554324669791224, "grad_norm": 83.57473081650586, "learning_rate": 3.6905200898440657e-07, "loss": 0.2746, "step": 8240, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9042654028436019, "success_rate.epoch.env.math": 0.9715284715284715, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8364620938628159, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8691967263075466, "success_rate.epoch.global": 0.9004237288135594, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964480048367593, "tokens_p.mean_in_band": 0.8253348214285714, "tokens_rate.above_band": 0.9916067146282974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008393285371702638 }, { "epoch": 1.7564976565828716, "grad_norm": 49.97892403907451, "learning_rate": 3.690187438181805e-07, "loss": 0.2496, "step": 8245, "success_rate.epoch.env.abd": 0.9839228295819936, "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9046270066100094, "success_rate.epoch.env.math": 0.9715284715284715, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8359177785791562, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8692550834229557, "success_rate.epoch.global": 0.9002820874471086, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998747723132969, "tokens_p.mean_in_band": 0.6010044642857143, "tokens_rate.above_band": 0.987410071942446, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012589928057553957 }, { "epoch": 1.7575628461866213, "grad_norm": 114.47787118059497, "learning_rate": 3.6898547207915873e-07, "loss": 0.2586, "step": 8250, "success_rate.epoch.env.abd": 0.9839228295819936, "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9047169811320754, "success_rate.epoch.env.math": 0.9710578842315369, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8359712230215828, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8692253408472791, "success_rate.epoch.global": 0.9001408450704226, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939338235294117, "tokens_p.mean_in_band": 0.489375, "tokens_rate.above_band": 0.7727272727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22727272727272727 }, { "epoch": 1.7586280357903705, "grad_norm": 88.74631440721146, "learning_rate": 3.6895219379158955e-07, "loss": 0.3547, "step": 8255, "success_rate.epoch.env.abd": 0.9839743589743589, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9038642789820923, "success_rate.epoch.env.math": 0.9711011459890384, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8358477011494253, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8691961071486877, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9997195512820513, "tokens_p.mean_in_band": 0.6153927364864865, "tokens_rate.above_band": 0.9547123623011016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04528763769889841 }, { "epoch": 1.7596932253941202, "grad_norm": 58.862840691141415, "learning_rate": 3.689189089797258e-07, "loss": 0.3514, "step": 8260, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.903954802259887, "success_rate.epoch.env.math": 0.9706467661691542, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8360832137733142, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8691937189876385, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946446572580645, "tokens_p.mean_in_band": 0.7691127232142857, "tokens_rate.above_band": 0.9860834990059643, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013916500994035786 }, { "epoch": 1.7607584149978697, "grad_norm": 325.74195709203667, "learning_rate": 3.6888561766782534e-07, "loss": 0.3825, "step": 8265, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9040451552210724, "success_rate.epoch.env.math": 0.9707196029776675, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8363180515759312, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8692299033123945, "success_rate.epoch.global": 0.900140252454418, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.7176649305555556, "tokens_rate.above_band": 0.898876404494382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10112359550561797 }, { "epoch": 1.7618236046016191, "grad_norm": 139.83387070271536, "learning_rate": 3.688523198801505e-07, "loss": 0.2471, "step": 8270, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9698275862068966, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9033771106941839, "success_rate.epoch.env.math": 0.9707486365889936, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.836552217453505, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8692049734462071, "success_rate.epoch.global": 0.900140056022409, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941737288135594, "tokens_p.mean_in_band": 0.73828125, "tokens_rate.above_band": 0.9609120521172638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03908794788273615 }, { "epoch": 1.7628887942053686, "grad_norm": 174.87619621200622, "learning_rate": 3.6881901564096864e-07, "loss": 0.2019, "step": 8275, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, "success_rate.epoch.env.agentgym:sciworld": 0.9699570815450643, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9034676663542643, "success_rate.epoch.env.math": 0.9707776126795443, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8368439842913246, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8693045640002275, "success_rate.epoch.global": 0.9002797202797203, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976500659630607, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.763953983809118, "grad_norm": 477.69093397229483, "learning_rate": 3.6878570497455147e-07, "loss": 0.3575, "step": 8280, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9513677811550152, "success_rate.epoch.env.logic": 0.9034676663542643, "success_rate.epoch.env.math": 0.9703557312252964, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.837018544935806, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.869305324327738, "success_rate.epoch.global": 0.9002793296089385, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985733695652174, "tokens_p.mean_in_band": 0.61328125, "tokens_rate.above_band": 0.9913793103448276, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008620689655172414 }, { "epoch": 1.7650191734128675, "grad_norm": 115.24093305979926, "learning_rate": 3.687523879051757e-07, "loss": 0.1575, "step": 8285, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8807339449541285, "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9516616314199395, "success_rate.epoch.env.logic": 0.903558052434457, "success_rate.epoch.env.math": 0.9704142011834319, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8370766488413547, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8690644729443001, "success_rate.epoch.global": 0.900278940027894, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984327983951855, "tokens_p.mean_in_band": 0.7261029411764706, "tokens_rate.above_band": 0.9832347140039448, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016765285996055226 }, { "epoch": 1.766084363016617, "grad_norm": 150.02177723714118, "learning_rate": 3.687190644571225e-07, "loss": 0.4813, "step": 8290, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8807339449541285, "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9519519519519519, "success_rate.epoch.env.logic": 0.9036482694106641, "success_rate.epoch.env.math": 0.9704724409448819, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8368945868945868, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8690878107009276, "success_rate.epoch.global": 0.9002785515320334, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998146186440678, "tokens_p.mean_in_band": 0.4661458333333333, "tokens_rate.above_band": 0.963265306122449, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036734693877551024 }, { "epoch": 1.7671495526203664, "grad_norm": 120.31289882633322, "learning_rate": 3.686857346546778e-07, "loss": 0.4033, "step": 8295, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8767123287671232, "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9519519519519519, "success_rate.epoch.env.logic": 0.9039179104477612, "success_rate.epoch.env.math": 0.9705304518664047, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8370106761565836, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8687625493403469, "success_rate.epoch.global": 0.9002781641168289, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997359154929577, "tokens_p.mean_in_band": 0.5966796875, "tokens_rate.above_band": 0.9943977591036415, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0056022408963585435 }, { "epoch": 1.768214742224116, "grad_norm": 129.2118145999637, "learning_rate": 3.686523985221321e-07, "loss": 0.3394, "step": 8300, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8772727272727273, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9520958083832335, "success_rate.epoch.env.logic": 0.9030754892823858, "success_rate.epoch.env.math": 0.9705738106915155, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8371845005332386, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8687812069827718, "success_rate.epoch.global": 0.9002777777777777, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000803755144033, "tokens_p.mean_in_band": 0.6566611842105263, "tokens_rate.above_band": 0.9808274470232089, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01917255297679112 }, { "epoch": 1.7692799318278654, "grad_norm": 74.52794628196288, "learning_rate": 3.686190560837805e-07, "loss": 0.2688, "step": 8305, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9522388059701492, "success_rate.epoch.env.logic": 0.9032558139534884, "success_rate.epoch.env.math": 0.9705882352941176, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8374156904508342, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8685239418354412, "success_rate.epoch.global": 0.9002773925104022, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989945302445302, "tokens_p.mean_in_band": 0.7467830882352942, "tokens_rate.above_band": 0.9785894206549118, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021410579345088162 }, { "epoch": 1.7703451214316148, "grad_norm": 87.47326385592673, "learning_rate": 3.685857073639228e-07, "loss": 0.2358, "step": 8310, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.903435468895079, "success_rate.epoch.env.math": 0.9706026457618814, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.837646224742999, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8685914372071264, "success_rate.epoch.global": 0.900415512465374, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.000840227507756, "tokens_p.mean_in_band": 0.892578125, "tokens_rate.above_band": 0.9979360165118679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0020639834881320948 }, { "epoch": 1.7714103110353643, "grad_norm": 124.70343574185186, "learning_rate": 3.6855235238686325e-07, "loss": 0.423, "step": 8315, "success_rate.epoch.env.abd": 0.9841772151898734, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9525222551928784, "success_rate.epoch.env.logic": 0.9035250463821892, "success_rate.epoch.env.math": 0.9706314243759178, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.83793347487615, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8686457226715912, "success_rate.epoch.global": 0.9005532503457815, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953648325358851, "tokens_p.mean_in_band": 0.7083333333333334, "tokens_rate.above_band": 0.9720930232558139, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027906976744186046 }, { "epoch": 1.7724755006391137, "grad_norm": 202.3068292737047, "learning_rate": 3.685189911769108e-07, "loss": 0.3005, "step": 8320, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9525222551928784, "success_rate.epoch.env.logic": 0.9037927844588344, "success_rate.epoch.env.math": 0.9706888128969223, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8379908029713478, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8686895380409296, "success_rate.epoch.global": 0.9006906077348066, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996919014084507, "tokens_p.mean_in_band": 0.759765625, "tokens_rate.above_band": 0.9861111111111112, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013888888888888888 }, { "epoch": 1.7735406902428632, "grad_norm": 25.208703512723403, "learning_rate": 3.684856237583787e-07, "loss": 0.2872, "step": 8325, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9525222551928784, "success_rate.epoch.env.logic": 0.9038817005545287, "success_rate.epoch.env.math": 0.9707602339181286, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8381625441696113, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.868758378803038, "success_rate.epoch.global": 0.9008275862068965, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973152920962199, "tokens_p.mean_in_band": 0.8736979166666666, "tokens_rate.above_band": 0.9948717948717949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005128205128205128 }, { "epoch": 1.7746058798466127, "grad_norm": 91.98801952864397, "learning_rate": 3.6845225015558503e-07, "loss": 0.4416, "step": 8330, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9038817005545287, "success_rate.epoch.env.math": 0.9708313077297035, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8380381086803105, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8687662974414821, "success_rate.epoch.global": 0.9008264462809917, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983198924731183, "tokens_p.mean_in_band": 0.6966145833333334, "tokens_rate.above_band": 0.9872611464968153, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012738853503184714 }, { "epoch": 1.7756710694503621, "grad_norm": 345.2172572080791, "learning_rate": 3.6841887039285223e-07, "loss": 0.2875, "step": 8335, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9039704524469068, "success_rate.epoch.env.math": 0.9708879184861717, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8383233532934131, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8688054435562048, "success_rate.epoch.global": 0.9009628610729024, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948694029850746, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7767362590541116, "grad_norm": 46.50858499775321, "learning_rate": 3.683854844945071e-07, "loss": 0.2299, "step": 8340, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9040590405904059, "success_rate.epoch.env.math": 0.9709583736689255, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.838494018296974, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8688908494132388, "success_rate.epoch.global": 0.9010989010989011, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960227272727272, "tokens_p.mean_in_band": 0.888671875, "tokens_rate.above_band": 0.9927797833935018, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007220216606498195 }, { "epoch": 1.777801448657861, "grad_norm": 42.75726907247912, "learning_rate": 3.683520924848812e-07, "loss": 0.1527, "step": 8345, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8699551569506726, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9487179487179487, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9042357274401474, "success_rate.epoch.env.math": 0.9710144927536232, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8386643233743409, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8685712488758394, "success_rate.epoch.global": 0.9010973936899863, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985119047619048, "tokens_p.mean_in_band": 0.66171875, "tokens_rate.above_band": 0.9861878453038674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013812154696132596 }, { "epoch": 1.7788666382616105, "grad_norm": 56.75882220070855, "learning_rate": 3.683186943883103e-07, "loss": 0.2209, "step": 8350, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9044117647058824, "success_rate.epoch.env.math": 0.9710564399421129, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8387776606954689, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8688230058956432, "success_rate.epoch.global": 0.9012328767123288, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983229712041884, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9947916666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005208333333333333 }, { "epoch": 1.7799318278653602, "grad_norm": 257.8840695143015, "learning_rate": 3.6828529022913473e-07, "loss": 0.2682, "step": 8355, "success_rate.epoch.env.abd": 0.9843260188087775, "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9045871559633027, "success_rate.epoch.env.math": 0.9711121810303323, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8385964912280702, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8688320288163041, "success_rate.epoch.global": 0.9012311901504788, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950810185185185, "tokens_p.mean_in_band": 0.596875, "tokens_rate.above_band": 0.9557522123893806, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04424778761061947 }, { "epoch": 1.7809970174691094, "grad_norm": 130.47379827753818, "learning_rate": 3.6825188003169917e-07, "loss": 0.2674, "step": 8360, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9528023598820059, "success_rate.epoch.env.logic": 0.9046746104491292, "success_rate.epoch.env.math": 0.9711538461538461, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8388227049754731, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8688814790468932, "success_rate.epoch.global": 0.9013661202185792, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971590909090909, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9930555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006944444444444444 }, { "epoch": 1.7820622070728591, "grad_norm": 83.52970371647345, "learning_rate": 3.6821846382035266e-07, "loss": 0.2227, "step": 8365, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.971195391262602, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8386983904828551, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8688989351859803, "success_rate.epoch.global": 0.9013642564802182, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997633495145631, "tokens_p.mean_in_band": 0.5611049107142857, "tokens_rate.above_band": 0.9735349716446124, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026465028355387523 }, { "epoch": 1.7831273966766084, "grad_norm": 55.241273610000256, "learning_rate": 3.681850416194489e-07, "loss": 0.2441, "step": 8370, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8716814159292036, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9048490393412626, "success_rate.epoch.env.math": 0.9712368168744008, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8389238294898672, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.868994244666824, "success_rate.epoch.global": 0.9014986376021799, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962143705463183, "tokens_p.mean_in_band": 0.6943359375, "tokens_rate.above_band": 0.9952718676122931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004728132387706856 }, { "epoch": 1.784192586280358, "grad_norm": 92.67297601578905, "learning_rate": 3.6815161345334553e-07, "loss": 0.4485, "step": 8375, "success_rate.epoch.env.abd": 0.984472049689441, "success_rate.epoch.env.agentgym:alfworld": 0.8722466960352423, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9050228310502283, "success_rate.epoch.env.math": 0.9712505989458553, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8391486392184229, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8687661250083368, "success_rate.epoch.global": 0.9014965986394557, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969444444444444, "tokens_p.mean_in_band": 0.469970703125, "tokens_rate.above_band": 0.9656652360515021, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034334763948497854 }, { "epoch": 1.7852577758841073, "grad_norm": 48.337398003004665, "learning_rate": 3.681181793464049e-07, "loss": 0.3813, "step": 8380, "success_rate.epoch.env.abd": 0.984472049689441, "success_rate.epoch.env.agentgym:alfworld": 0.8728070175438597, "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9051959890610757, "success_rate.epoch.env.math": 0.97131931166348, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8392047436344611, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8688553394110768, "success_rate.epoch.global": 0.9016304347826087, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987623762376238, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.786322965487857, "grad_norm": 498.04105566005234, "learning_rate": 3.680847393229935e-07, "loss": 0.2806, "step": 8385, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9052823315118397, "success_rate.epoch.env.math": 0.9713467048710601, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8390243902439024, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8689769034489306, "success_rate.epoch.global": 0.9016282225237449, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995903558052435, "tokens_p.mean_in_band": 0.345703125, "tokens_rate.above_band": 0.9925650557620818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007434944237918215 }, { "epoch": 1.7873881550916062, "grad_norm": 59.349432049608815, "learning_rate": 3.680512934074822e-07, "loss": 0.2393, "step": 8390, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, "success_rate.epoch.env.agentgym:sciworld": 0.9708333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9530791788856305, "success_rate.epoch.env.logic": 0.9046321525885559, "success_rate.epoch.env.math": 0.9713876967095851, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8391364902506964, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8689553535943197, "success_rate.epoch.global": 0.9016260162601626, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000140134529148, "tokens_p.mean_in_band": 0.6315104166666666, "tokens_rate.above_band": 0.9966480446927374, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0033519553072625698 }, { "epoch": 1.7884533446953559, "grad_norm": 127.47583311210774, "learning_rate": 3.680178416242461e-07, "loss": 0.3413, "step": 8395, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, "success_rate.epoch.env.agentgym:sciworld": 0.9708333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9530791788856305, "success_rate.epoch.env.logic": 0.9039855072463768, "success_rate.epoch.env.math": 0.9714285714285714, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8386648122392212, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8688574037184409, "success_rate.epoch.global": 0.9013531799729364, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9968243740795287, "tokens_p.mean_in_band": 0.5754206730769231, "tokens_rate.above_band": 0.9631205673758865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03687943262411347 }, { "epoch": 1.7895185342991051, "grad_norm": 73.65532766359051, "learning_rate": 3.6798438399766464e-07, "loss": 0.2555, "step": 8400, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8744588744588745, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9530791788856305, "success_rate.epoch.env.logic": 0.9039855072463768, "success_rate.epoch.env.math": 0.97144217039505, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8388329281000347, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8690041720347729, "success_rate.epoch.global": 0.9014864864864864, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984517601043025, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7905837239028548, "grad_norm": 111.28770294088349, "learning_rate": 3.679509205521215e-07, "loss": 0.31, "step": 8405, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9039855072463768, "success_rate.epoch.env.math": 0.9714693295292439, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8388214904679376, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8690672668008271, "success_rate.epoch.global": 0.9014844804318488, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969019396551724, "tokens_p.mean_in_band": 0.46986607142857145, "tokens_rate.above_band": 0.9851380042462845, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014861995753715499 }, { "epoch": 1.791648913506604, "grad_norm": 418.5759581172558, "learning_rate": 3.679174513120046e-07, "loss": 0.2362, "step": 8410, "success_rate.epoch.env.abd": 0.9847094801223242, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9041591320072333, "success_rate.epoch.env.math": 0.9714964370546318, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.83898891966759, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8691201115704097, "success_rate.epoch.global": 0.9016172506738545, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989224137931034, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7927141031103537, "grad_norm": 96.83658454131306, "learning_rate": 3.678839763017061e-07, "loss": 0.2445, "step": 8415, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9533527696793003, "success_rate.epoch.env.logic": 0.9041591320072333, "success_rate.epoch.env.math": 0.9715504978662873, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.838865836791148, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8691304743689695, "success_rate.epoch.global": 0.9016150740242261, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948620495495496, "tokens_p.mean_in_band": 0.64532470703125, "tokens_rate.above_band": 0.9823008849557522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017699115044247787 }, { "epoch": 1.793779292714103, "grad_norm": 48.72980061970136, "learning_rate": 3.6785049554562225e-07, "loss": 0.4237, "step": 8420, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9034296028880866, "success_rate.epoch.env.math": 0.9716043539990534, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8390328151986183, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.869096556899407, "success_rate.epoch.global": 0.9016129032258065, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976929530201343, "tokens_p.mean_in_band": 0.5660511363636364, "tokens_rate.above_band": 0.9793427230046948, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020657276995305163 }, { "epoch": 1.7948444823178527, "grad_norm": 51.40452171372572, "learning_rate": 3.678170090681537e-07, "loss": 0.3246, "step": 8425, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9035166816952209, "success_rate.epoch.env.math": 0.9716580066131318, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.838909968954812, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8687907607780734, "success_rate.epoch.global": 0.901476510067114, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953972868217055, "tokens_p.mean_in_band": 0.6076171875, "tokens_rate.above_band": 0.8657718120805369, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1342281879194631 }, { "epoch": 1.795909671921602, "grad_norm": 58.47596334101638, "learning_rate": 3.677835168937052e-07, "loss": 0.3301, "step": 8430, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.9027902790279028, "success_rate.epoch.env.math": 0.971253534401508, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8386763185108583, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8686789690498805, "success_rate.epoch.global": 0.9012064343163538, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959610133495146, "tokens_p.mean_below_band": 8.866190910339355e-07, "tokens_p.mean_in_band": 0.3921875, "tokens_rate.above_band": 0.9751479289940829, "tokens_rate.below_band": 0.001183431952662722, "tokens_rate.in_band": 0.023668639053254437 }, { "epoch": 1.7969748615253516, "grad_norm": 219.06314510997606, "learning_rate": 3.6775001904668545e-07, "loss": 0.3121, "step": 8435, "success_rate.epoch.env.abd": 0.9848024316109423, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.9028776978417267, "success_rate.epoch.env.math": 0.971307619943556, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8388984509466437, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8687162391263917, "success_rate.epoch.global": 0.9013386880856761, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9918478260869565, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9019607843137255, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09803921568627451 }, { "epoch": 1.7980400511291008, "grad_norm": 39.81884928918224, "learning_rate": 3.6771651555150746e-07, "loss": 0.3656, "step": 8440, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.9013452914798207, "success_rate.epoch.env.math": 0.9713211095439587, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8388316151202749, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8685762664581801, "success_rate.epoch.global": 0.9010695187165776, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971942724458205, "tokens_p.mean_in_band": 0.5909598214285714, "tokens_rate.above_band": 0.9584569732937686, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04154302670623145 }, { "epoch": 1.7991052407328505, "grad_norm": 84.77316424565245, "learning_rate": 3.676830064325885e-07, "loss": 0.3008, "step": 8445, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.9013452914798207, "success_rate.epoch.env.math": 0.9713883677298312, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8390528483184626, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8686132256541833, "success_rate.epoch.global": 0.9012016021361816, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961168639053254, "tokens_p.mean_in_band": 0.7174479166666666, "tokens_rate.above_band": 0.9825581395348837, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01744186046511628 }, { "epoch": 1.8001704303366, "grad_norm": 211.15306640923058, "learning_rate": 3.676494917143496e-07, "loss": 0.3796, "step": 8450, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9536231884057971, "success_rate.epoch.env.logic": 0.9014336917562724, "success_rate.epoch.env.math": 0.971441947565543, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8389307745030843, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8686256804101397, "success_rate.epoch.global": 0.9012, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99609375, "tokens_p.mean_in_band": 0.7559344951923077, "tokens_rate.above_band": 0.9078014184397163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09219858156028368 }, { "epoch": 1.8012356199403494, "grad_norm": 54.855635536562424, "learning_rate": 3.676159714212161e-07, "loss": 0.2858, "step": 8455, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9016100178890877, "success_rate.epoch.env.math": 0.9714686623012161, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.839151266255989, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8687106480061041, "success_rate.epoch.global": 0.9013315579227696, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970737632508834, "tokens_p.mean_in_band": 0.8058035714285714, "tokens_rate.above_band": 0.987783595113438, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012216404886561954 }, { "epoch": 1.8023008095440989, "grad_norm": 31.371114937652205, "learning_rate": 3.675824455776174e-07, "loss": 0.3861, "step": 8460, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8675213675213675, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9016100178890877, "success_rate.epoch.env.math": 0.9714953271028037, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8387978142076503, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8680010643947057, "success_rate.epoch.global": 0.9009308510638298, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9985965568862275, "tokens_p.mean_in_band": 0.41158854166666664, "tokens_rate.above_band": 0.9570200573065902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04297994269340974 }, { "epoch": 1.8033659991478483, "grad_norm": 141.56172827880636, "learning_rate": 3.6754891420798683e-07, "loss": 0.3029, "step": 8465, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9008928571428572, "success_rate.epoch.env.math": 0.9715086408220458, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8387316740538697, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8676479212091494, "success_rate.epoch.global": 0.90066401062417, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.00035140562249, "tokens_p.mean_in_band": 0.505615234375, "tokens_rate.above_band": 0.9873116574147502, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012688342585249802 }, { "epoch": 1.8044311887515978, "grad_norm": 125.1743919274533, "learning_rate": 3.6751537733676183e-07, "loss": 0.2639, "step": 8470, "success_rate.epoch.env.abd": 0.984984984984985, "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9009812667261374, "success_rate.epoch.env.math": 0.971535230984601, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8389513108614233, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.867690751817321, "success_rate.epoch.global": 0.9007957559681697, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971804511278195, "tokens_p.mean_in_band": 0.865234375, "tokens_rate.above_band": 0.9708029197080292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029197080291970802 }, { "epoch": 1.8054963783553473, "grad_norm": 136.04580099448177, "learning_rate": 3.6748183498838383e-07, "loss": 0.2773, "step": 8475, "success_rate.epoch.env.abd": 0.9850299401197605, "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9538904899135446, "success_rate.epoch.env.logic": 0.9011576135351737, "success_rate.epoch.env.math": 0.9710955710955711, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8388303298197892, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8676720177712451, "success_rate.epoch.global": 0.9006622516556292, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997378587196468, "tokens_p.mean_in_band": 0.5037006578947368, "tokens_rate.above_band": 0.9794594594594594, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02054054054054054 }, { "epoch": 1.8065615679590967, "grad_norm": 349.799537670917, "learning_rate": 3.6744828718729826e-07, "loss": 0.3093, "step": 8480, "success_rate.epoch.env.abd": 0.9850299401197605, "success_rate.epoch.env.agentgym:alfworld": 0.864406779661017, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9511494252873564, "success_rate.epoch.env.logic": 0.9012455516014235, "success_rate.epoch.env.math": 0.9711493718008376, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8383152173913043, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8673994455653748, "success_rate.epoch.global": 0.9003968253968254, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949678375252964, "tokens_p.mean_in_band": 0.5573410700363826, "tokens_rate.above_band": 0.8779187817258883, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12208121827411167 }, { "epoch": 1.8076267575628462, "grad_norm": 66.9464921394573, "learning_rate": 3.674147339579545e-07, "loss": 0.5047, "step": 8485, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9511494252873564, "success_rate.epoch.env.logic": 0.90150842945874, "success_rate.epoch.env.math": 0.9711761971176197, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.838140481845945, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8674659708078494, "success_rate.epoch.global": 0.9003963011889036, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992288961038961, "tokens_p.mean_in_band": 0.6330729166666667, "tokens_rate.above_band": 0.9808917197452229, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01910828025477707 }, { "epoch": 1.8086919471665956, "grad_norm": 105.73409208723615, "learning_rate": 3.673811753248059e-07, "loss": 0.1991, "step": 8490, "success_rate.epoch.env.abd": 0.9851190476190477, "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.9016829052258636, "success_rate.epoch.env.math": 0.9712163416898792, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8383050847457627, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8675172087156696, "success_rate.epoch.global": 0.9005277044854881, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984439834024896, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9877049180327869, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012295081967213115 }, { "epoch": 1.809757136770345, "grad_norm": 87.4635664032167, "learning_rate": 3.6734761131230987e-07, "loss": 0.1867, "step": 8495, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.9017699115044248, "success_rate.epoch.env.math": 0.9712830013895322, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8384693532001355, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8675501261303346, "success_rate.epoch.global": 0.9006587615283268, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969682835820896, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.8108223263740946, "grad_norm": 123.38034576458628, "learning_rate": 3.673140419449274e-07, "loss": 0.4046, "step": 8500, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.8649789029535865, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.9019434628975265, "success_rate.epoch.env.math": 0.971309578898658, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8387423935091278, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8676036145660077, "success_rate.epoch.global": 0.9007894736842105, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933467741935483, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9451219512195121, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054878048780487805 }, { "epoch": 1.811887515977844, "grad_norm": 186.8125349768505, "learning_rate": 3.6728046724712376e-07, "loss": 0.3676, "step": 8505, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.9020300088261254, "success_rate.epoch.env.math": 0.9713361072584373, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8383940620782726, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.867251830722286, "success_rate.epoch.global": 0.9005256241787122, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.997314453125, "tokens_p.mean_in_band": 0.5842927631578947, "tokens_rate.above_band": 0.9309090909090909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06909090909090909 }, { "epoch": 1.8129527055815935, "grad_norm": 245.92692628209977, "learning_rate": 3.6724688724336796e-07, "loss": 0.151, "step": 8510, "success_rate.epoch.env.abd": 0.985207100591716, "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.9022026431718062, "success_rate.epoch.env.math": 0.9713890170742963, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8385574654533199, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.867291180120516, "success_rate.epoch.global": 0.9006561679790026, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9884733606557377, "tokens_p.mean_in_band": 0.884765625, "tokens_rate.above_band": 0.9838709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016129032258064516 }, { "epoch": 1.814017895185343, "grad_norm": 43.71584633814408, "learning_rate": 3.672133019581328e-07, "loss": 0.3417, "step": 8515, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512893982808023, "success_rate.epoch.env.logic": 0.9022887323943662, "success_rate.epoch.env.math": 0.9714153988012909, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8388290713324361, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8673340068372444, "success_rate.epoch.global": 0.9007863695937091, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946120689655172, "tokens_p.mean_in_band": 0.7261284722222222, "tokens_rate.above_band": 0.928, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.072 }, { "epoch": 1.8150830847890924, "grad_norm": 59.75579659371212, "learning_rate": 3.671797114158949e-07, "loss": 0.3303, "step": 8520, "success_rate.epoch.env.abd": 0.9853372434017595, "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.9023746701846965, "success_rate.epoch.env.math": 0.9714548802946593, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8389374579690653, "success_rate.epoch.env.webshop": 0.9761904761904762, "success_rate.epoch.env_macro_mean": 0.8674350158768592, "success_rate.epoch.global": 0.9009162303664922, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9940124671916011, "tokens_p.mean_in_band": 0.8109375, "tokens_rate.above_band": 0.9934810951760105, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00651890482398957 }, { "epoch": 1.8161482743928419, "grad_norm": 116.2575874490211, "learning_rate": 3.67146115641135e-07, "loss": 0.4174, "step": 8525, "success_rate.epoch.env.abd": 0.9853801169590644, "success_rate.epoch.env.agentgym:alfworld": 0.8613445378151261, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.9025460930640913, "success_rate.epoch.env.math": 0.9714811407543699, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.839099764863957, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8691618728369778, "success_rate.epoch.global": 0.9010457516339869, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984296482412061, "tokens_p.mean_in_band": 0.7900390625, "tokens_rate.above_band": 0.9900497512437811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009950248756218905 }, { "epoch": 1.8172134639965913, "grad_norm": 65.82815264964785, "learning_rate": 3.671125146583374e-07, "loss": 0.1464, "step": 8530, "success_rate.epoch.env.abd": 0.9854651162790697, "success_rate.epoch.env.agentgym:alfworld": 0.8625, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.9025460930640913, "success_rate.epoch.env.math": 0.9714942528735632, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393695506371562, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8693003600549115, "success_rate.epoch.global": 0.9011749347258485, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975294729542302, "tokens_p.mean_in_band": 0.880859375, "tokens_rate.above_band": 0.9972337482710927, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0027662517289073307 }, { "epoch": 1.8182786536003408, "grad_norm": 185.3163671114768, "learning_rate": 3.670789084919902e-07, "loss": 0.2633, "step": 8535, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8625, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.9028021015761821, "success_rate.epoch.env.math": 0.9715335169880625, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8394772117962467, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8694516851529612, "success_rate.epoch.global": 0.9013037809647979, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967981557377049, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.8193438432040905, "grad_norm": 119.93006513072066, "learning_rate": 3.6704529716658537e-07, "loss": 0.2836, "step": 8540, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8625, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.902972027972028, "success_rate.epoch.env.math": 0.9715726730857405, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8396921017402945, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8695232139838737, "success_rate.epoch.global": 0.9014322916666667, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956380208333333, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.995850622406639, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004149377593360996 }, { "epoch": 1.8204090328078397, "grad_norm": 114.32632587520037, "learning_rate": 3.6701168070661856e-07, "loss": 0.2852, "step": 8545, "success_rate.epoch.env.abd": 0.9855491329479769, "success_rate.epoch.env.agentgym:alfworld": 0.8625, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.903056768558952, "success_rate.epoch.env.math": 0.9711406321575813, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8400133600534402, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8695246543968806, "success_rate.epoch.global": 0.9014304291287386, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9932484567901234, "tokens_p.mean_in_band": 0.7776227678571429, "tokens_rate.above_band": 0.9204545454545454, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07954545454545454 }, { "epoch": 1.8214742224115894, "grad_norm": 83.38526281362576, "learning_rate": 3.669780591365892e-07, "loss": 0.2369, "step": 8550, "success_rate.epoch.env.abd": 0.9855491329479769, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9514285714285714, "success_rate.epoch.env.logic": 0.9031413612565445, "success_rate.epoch.env.math": 0.9711934156378601, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8401201602136181, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8696604631811379, "success_rate.epoch.global": 0.9015584415584416, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977779878971256, "tokens_p.mean_in_band": 0.7903645833333334, "tokens_rate.above_band": 0.9954819277108434, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004518072289156626 }, { "epoch": 1.8225394120153386, "grad_norm": 92.90295711310547, "learning_rate": 3.669444324810006e-07, "loss": 0.2809, "step": 8555, "success_rate.epoch.env.abd": 0.9855907780979827, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9515669515669516, "success_rate.epoch.env.logic": 0.9031413612565445, "success_rate.epoch.env.math": 0.9712460063897763, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8396666666666667, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8696403834078059, "success_rate.epoch.global": 0.9014267185473411, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989708083832335, "tokens_p.mean_in_band": 0.5501302083333334, "tokens_rate.above_band": 0.9823529411764705, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01764705882352941 }, { "epoch": 1.8236046016190883, "grad_norm": 62.04636971662324, "learning_rate": 3.6691080076435945e-07, "loss": 0.1885, "step": 8560, "success_rate.epoch.env.abd": 0.9855907780979827, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9515669515669516, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.971285323609845, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393213572854291, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8696202427745241, "success_rate.epoch.global": 0.9012953367875648, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9932598039215687, "tokens_p.mean_in_band": 0.58984375, "tokens_rate.above_band": 0.9357798165137615, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06422018348623854 }, { "epoch": 1.8246697912228376, "grad_norm": 80.86503687070869, "learning_rate": 3.668771640111764e-07, "loss": 0.3293, "step": 8565, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9517045454545454, "success_rate.epoch.env.logic": 0.9033942558746736, "success_rate.epoch.env.math": 0.9713245334547109, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393747921516461, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697022381964132, "success_rate.epoch.global": 0.9014230271668823, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963235294117647, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.8257349808265873, "grad_norm": 75.49193121156453, "learning_rate": 3.668435222459656e-07, "loss": 0.1695, "step": 8570, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9517045454545454, "success_rate.epoch.env.logic": 0.9034782608695652, "success_rate.epoch.env.math": 0.9713766469786461, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8396414342629482, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697388527991524, "success_rate.epoch.global": 0.9015503875968992, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9877136752136753, "tokens_p.mean_in_band": 0.7712673611111112, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 1.8268001704303365, "grad_norm": 147.24169525468326, "learning_rate": 3.66809875493245e-07, "loss": 0.275, "step": 8575, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9518413597733711, "success_rate.epoch.env.logic": 0.9028620988725065, "success_rate.epoch.env.math": 0.9714026327734907, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8398541114058355, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697169723681977, "success_rate.epoch.global": 0.9015483870967742, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994676853707415, "tokens_p.mean_in_band": 0.607421875, "tokens_rate.above_band": 0.9960079840319361, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003992015968063872 }, { "epoch": 1.8278653600340862, "grad_norm": 79.5571842198206, "learning_rate": 3.6677622377753603e-07, "loss": 0.2458, "step": 8580, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9518413597733711, "success_rate.epoch.env.logic": 0.9028620988725065, "success_rate.epoch.env.math": 0.9714544630720435, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8397881496193313, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697667028900052, "success_rate.epoch.global": 0.9015463917525773, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975665983606558, "tokens_p.mean_in_band": 0.728515625, "tokens_rate.above_band": 0.976, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024 }, { "epoch": 1.8289305496378354, "grad_norm": 154.21009305469008, "learning_rate": 3.667425671233639e-07, "loss": 0.6609, "step": 8585, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8647540983606558, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.9518413597733711, "success_rate.epoch.env.logic": 0.9028620988725065, "success_rate.epoch.env.math": 0.9714673913043478, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8394980184940555, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.869795842265548, "success_rate.epoch.global": 0.9014157014157014, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976388888888889, "tokens_p.mean_in_band": 0.6024305555555556, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 1.829995739241585, "grad_norm": 335.7108853351841, "learning_rate": 3.667089055552573e-07, "loss": 0.3227, "step": 8590, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8647540983606558, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.952112676056338, "success_rate.epoch.env.logic": 0.902946273830156, "success_rate.epoch.env.math": 0.9715189873417721, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393269548003959, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8698172989550372, "success_rate.epoch.global": 0.901413881748072, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978505291005291, "tokens_p.mean_in_band": 0.4482421875, "tokens_rate.above_band": 0.9792746113989638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02072538860103627 }, { "epoch": 1.8310609288453343, "grad_norm": 107.95032943813455, "learning_rate": 3.666752390977485e-07, "loss": 0.3423, "step": 8595, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.952112676056338, "success_rate.epoch.env.logic": 0.902946273830156, "success_rate.epoch.env.math": 0.9715575620767495, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393151135989463, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8698699132842964, "success_rate.epoch.global": 0.9014120667522465, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948795180722891, "tokens_p.mean_in_band": 0.6761067708333334, "tokens_rate.above_band": 0.9718969555035128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02810304449648712 }, { "epoch": 1.832126118449084, "grad_norm": 56.83198867234907, "learning_rate": 3.666415677753735e-07, "loss": 0.2797, "step": 8600, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.952112676056338, "success_rate.epoch.env.logic": 0.9031979256698358, "success_rate.epoch.env.math": 0.9715960324616771, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8388687931601447, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8698557134466424, "success_rate.epoch.global": 0.9012820512820513, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9940476190476191, "tokens_p.mean_in_band": 0.6139914772727273, "tokens_rate.above_band": 0.9051724137931034, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09482758620689655 }, { "epoch": 1.8331913080528333, "grad_norm": 0.0, "learning_rate": 3.666078916126716e-07, "loss": 0.1571, "step": 8605, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.952247191011236, "success_rate.epoch.env.logic": 0.9032815198618307, "success_rate.epoch.env.math": 0.9712100764732343, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8390275952693824, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8698548911982503, "success_rate.epoch.global": 0.901280409731114, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996424485125858, "tokens_p.mean_below_band": 2.2118911147117615e-08, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.997716894977169, "tokens_rate.below_band": 0.001141552511415525, "tokens_rate.in_band": 0.001141552511415525 }, { "epoch": 1.834256497656583, "grad_norm": 109.81075931162576, "learning_rate": 3.665742106341857e-07, "loss": 0.3689, "step": 8610, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.952247191011236, "success_rate.epoch.env.logic": 0.9026701119724375, "success_rate.epoch.env.math": 0.9708258527827648, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8388049901510177, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697441424075024, "success_rate.epoch.global": 0.9010230179028133, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.6555555555555556, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969389619883041, "tokens_p.mean_in_band": 0.602796052631579, "tokens_rate.above_band": 0.9473684210526315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05263157894736842 }, { "epoch": 1.8353216872603322, "grad_norm": 48.26808723590609, "learning_rate": 3.665405248644624e-07, "loss": 0.1927, "step": 8615, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.952247191011236, "success_rate.epoch.env.logic": 0.9020618556701031, "success_rate.epoch.env.math": 0.9708650829224563, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8389635946211873, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697105525517099, "success_rate.epoch.global": 0.901021711366539, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971875, "tokens_p.mean_in_band": 0.646484375, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 1.8363868768640819, "grad_norm": 91.16632230601893, "learning_rate": 3.665068343280516e-07, "loss": 0.2845, "step": 8620, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8653061224489796, "success_rate.epoch.env.agentgym:sciworld": 0.9721115537848606, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9512195121951219, "success_rate.epoch.env.ded": 0.952513966480447, "success_rate.epoch.env.logic": 0.9021459227467811, "success_rate.epoch.env.math": 0.9708781362007168, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8392272429600524, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697777432742441, "success_rate.epoch.global": 0.9011479591836735, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0005634014423077, "tokens_p.mean_in_band": 0.8486328125, "tokens_rate.above_band": 0.9952153110047847, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004784688995215311 }, { "epoch": 1.8374520664678313, "grad_norm": 70.45980748244546, "learning_rate": 3.6647313904950667e-07, "loss": 0.289, "step": 8625, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8663967611336032, "success_rate.epoch.env.agentgym:sciworld": 0.9721115537848606, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.952513966480447, "success_rate.epoch.env.logic": 0.9021459227467811, "success_rate.epoch.env.math": 0.970917225950783, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393850179914949, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8700040745190315, "success_rate.epoch.global": 0.9012738853503185, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972222222222222, "tokens_p.mean_in_band": 0.8765625, "tokens_rate.above_band": 0.9915254237288136, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00847457627118644 }, { "epoch": 1.8385172560715808, "grad_norm": 90.94651430453813, "learning_rate": 3.6643943905338454e-07, "loss": 0.2248, "step": 8630, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8663967611336032, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.952513966480447, "success_rate.epoch.env.logic": 0.902229845626072, "success_rate.epoch.env.math": 0.9709562109025917, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8395949036262659, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8700543704727849, "success_rate.epoch.global": 0.9013994910941476, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987098623853211, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.8395824456753302, "grad_norm": 63.054858904476646, "learning_rate": 3.664057343642455e-07, "loss": 0.1912, "step": 8635, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9526462395543176, "success_rate.epoch.env.logic": 0.9023136246786633, "success_rate.epoch.env.math": 0.9709691826708352, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8395826540593414, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697564831092058, "success_rate.epoch.global": 0.9012706480304955, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998046875, "tokens_p.mean_in_band": 0.6958705357142857, "tokens_rate.above_band": 0.9846153846153847, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015384615384615385 }, { "epoch": 1.8406476352790797, "grad_norm": 18.50344239368859, "learning_rate": 3.663720250066533e-07, "loss": 0.1891, "step": 8640, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8629032258064516, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9527777777777777, "success_rate.epoch.env.logic": 0.9016253207869974, "success_rate.epoch.env.math": 0.9710080285459411, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8397915988277435, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697283944705969, "success_rate.epoch.global": 0.901269035532995, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000170068027211, "tokens_p.mean_in_band": 0.5756578947368421, "tokens_rate.above_band": 0.9748010610079576, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025198938992042442 }, { "epoch": 1.8417128248828292, "grad_norm": 144.5788589554126, "learning_rate": 3.6633831100517505e-07, "loss": 0.4581, "step": 8645, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8634538152610441, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9527777777777777, "success_rate.epoch.env.logic": 0.9017933390264731, "success_rate.epoch.env.math": 0.9710338680926917, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8394018205461639, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.869760637103255, "success_rate.epoch.global": 0.9011406844106464, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953703703703703, "tokens_p.mean_in_band": 0.5301846590909091, "tokens_rate.above_band": 0.9310344827586207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06896551724137931 }, { "epoch": 1.8427780144865786, "grad_norm": 68.72584600900457, "learning_rate": 3.6630459238438125e-07, "loss": 0.2544, "step": 8650, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8650793650793651, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9527777777777777, "success_rate.epoch.env.logic": 0.9010238907849829, "success_rate.epoch.env.math": 0.9710596616206589, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8392857142857143, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.869830254270923, "success_rate.epoch.global": 0.9010126582278482, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0003861202635915, "tokens_p.mean_in_band": 0.5553977272727273, "tokens_rate.above_band": 0.982200647249191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01779935275080906 }, { "epoch": 1.843843204090328, "grad_norm": 151.59932566960663, "learning_rate": 3.6627086916884584e-07, "loss": 0.362, "step": 8655, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9529085872576177, "success_rate.epoch.env.logic": 0.9011082693947144, "success_rate.epoch.env.math": 0.9710854092526691, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8392220421393841, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8695355249570386, "success_rate.epoch.global": 0.9008849557522124, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983766233766234, "tokens_p.mean_below_band": 5.3085386753082275e-08, "tokens_p.mean_in_band": 0.3076171875, "tokens_rate.above_band": 0.9903536977491961, "tokens_rate.below_band": 0.003215434083601286, "tokens_rate.in_band": 0.006430868167202572 }, { "epoch": 1.8449083936940776, "grad_norm": 90.18613340694525, "learning_rate": 3.6623714138314607e-07, "loss": 0.1854, "step": 8660, "success_rate.epoch.env.abd": 0.9857954545454546, "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9529085872576177, "success_rate.epoch.env.logic": 0.9011082693947144, "success_rate.epoch.env.math": 0.9711239449133718, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8394822006472492, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8695969670748326, "success_rate.epoch.global": 0.901010101010101, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9950657894736842, "tokens_p.mean_in_band": 0.8, "tokens_rate.above_band": 0.9715909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028409090909090908 }, { "epoch": 1.845973583297827, "grad_norm": 82.94777241142698, "learning_rate": 3.6620340905186247e-07, "loss": 0.3565, "step": 8665, "success_rate.epoch.env.abd": 0.9857954545454546, "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9529085872576177, "success_rate.epoch.env.logic": 0.9012765957446809, "success_rate.epoch.env.math": 0.9711623779946761, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393665158371041, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697059211051233, "success_rate.epoch.global": 0.9010088272383354, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972516286644951, "tokens_p.mean_in_band": 0.7299107142857143, "tokens_rate.above_band": 0.9887278582930756, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011272141706924315 }, { "epoch": 1.8470387729015765, "grad_norm": 126.21880070728955, "learning_rate": 3.6616967219957894e-07, "loss": 0.3996, "step": 8670, "success_rate.epoch.env.abd": 0.9857954545454546, "success_rate.epoch.env.agentgym:alfworld": 0.8616600790513834, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9530386740331491, "success_rate.epoch.env.logic": 0.9013605442176871, "success_rate.epoch.env.math": 0.9707835325365206, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8395221181788828, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697050840262288, "success_rate.epoch.global": 0.9010075566750629, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964978448275862, "tokens_p.mean_below_band": 2.7830537874251604e-10, "tokens_p.mean_in_band": 0.703125, "tokens_rate.above_band": 0.9872340425531915, "tokens_rate.below_band": 0.00425531914893617, "tokens_rate.in_band": 0.00851063829787234 }, { "epoch": 1.848103962505326, "grad_norm": 124.1414342864317, "learning_rate": 3.6613593085088263e-07, "loss": 0.3269, "step": 8675, "success_rate.epoch.env.abd": 0.9858757062146892, "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.953168044077135, "success_rate.epoch.env.logic": 0.9015280135823429, "success_rate.epoch.env.math": 0.9707964601769912, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8393548387096774, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8697748462632476, "success_rate.epoch.global": 0.9010062893081761, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996357202331391, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9950289975144988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004971002485501243 }, { "epoch": 1.8491691521090754, "grad_norm": 27.568502548372212, "learning_rate": 3.6610218503036403e-07, "loss": 0.3363, "step": 8680, "success_rate.epoch.env.abd": 0.9858757062146892, "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9505494505494505, "success_rate.epoch.env.logic": 0.9008474576271186, "success_rate.epoch.env.math": 0.9708351745470615, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8391878826941669, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8691408925887815, "success_rate.epoch.global": 0.9006281407035176, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.4333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9953870387038704, "tokens_p.mean_in_band": 0.7041193181818182, "tokens_rate.above_band": 0.9099099099099099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09009009009009009 }, { "epoch": 1.8502343417128249, "grad_norm": 231.50158626001303, "learning_rate": 3.6606843476261683e-07, "loss": 0.3401, "step": 8685, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9505494505494505, "success_rate.epoch.env.logic": 0.9009314140558848, "success_rate.epoch.env.math": 0.9708609271523179, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.839073060830383, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8691575440621393, "success_rate.epoch.global": 0.9006273525721455, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987824675324676, "tokens_p.mean_in_band": 0.1513671875, "tokens_rate.above_band": 0.9935483870967742, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0064516129032258064 }, { "epoch": 1.8512995313165743, "grad_norm": 33.90291242663054, "learning_rate": 3.6603468007223797e-07, "loss": 0.2315, "step": 8690, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8622047244094488, "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9505494505494505, "success_rate.epoch.env.logic": 0.9009314140558848, "success_rate.epoch.env.math": 0.9708994708994709, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8391136801541426, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8691647407049495, "success_rate.epoch.global": 0.9006265664160401, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9890510948905109, "tokens_p.mean_in_band": 0.78173828125, "tokens_rate.above_band": 0.9448275862068966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05517241379310345 }, { "epoch": 1.8523647209203238, "grad_norm": 167.57781940090902, "learning_rate": 3.6600092098382763e-07, "loss": 0.266, "step": 8695, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.86328125, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9480874316939891, "success_rate.epoch.env.logic": 0.9010152284263959, "success_rate.epoch.env.math": 0.9708994708994709, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8393200769724182, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8690749945822067, "success_rate.epoch.global": 0.9006257822277848, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972540824261276, "tokens_p.mean_in_band": 0.7094029017857143, "tokens_rate.above_band": 0.9945862335653519, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005413766434648105 }, { "epoch": 1.8534299105240732, "grad_norm": 163.77829352756117, "learning_rate": 3.6596715752198924e-07, "loss": 0.3513, "step": 8700, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9480874316939891, "success_rate.epoch.env.logic": 0.8995780590717299, "success_rate.epoch.env.math": 0.9709251101321585, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8395259449071109, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8690137507080885, "success_rate.epoch.global": 0.9005, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978703703703704, "tokens_p.mean_in_band": 0.5137746710526315, "tokens_rate.above_band": 0.9594882729211087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04051172707889126 }, { "epoch": 1.8544951001278227, "grad_norm": 36.36148672150636, "learning_rate": 3.659333897113293e-07, "loss": 0.2406, "step": 8705, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, "success_rate.epoch.env.agentgym:sciworld": 0.97265625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9483695652173914, "success_rate.epoch.env.logic": 0.8995780590717299, "success_rate.epoch.env.math": 0.9709634843818742, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8397312859884837, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8690713033668747, "success_rate.epoch.global": 0.9006242197253433, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961630286493861, "tokens_p.mean_in_band": 0.6171875, "tokens_rate.above_band": 0.9986376021798365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0013623978201634877 }, { "epoch": 1.8555602897315722, "grad_norm": 53.63600077529118, "learning_rate": 3.658996175764576e-07, "loss": 0.1747, "step": 8710, "success_rate.epoch.env.abd": 0.9859943977591037, "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, "success_rate.epoch.env.agentgym:sciworld": 0.97265625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9483695652173914, "success_rate.epoch.env.logic": 0.8996627318718381, "success_rate.epoch.env.math": 0.9710017574692443, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8399872245289045, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8691093239129862, "success_rate.epoch.global": 0.9007481296758105, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9880952380952381, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9921259842519685, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007874015748031496 }, { "epoch": 1.8566254793353218, "grad_norm": 175.93561049497623, "learning_rate": 3.65865841141987e-07, "loss": 0.3368, "step": 8715, "success_rate.epoch.env.abd": 0.9859943977591037, "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, "success_rate.epoch.env.agentgym:sciworld": 0.97265625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9483695652173914, "success_rate.epoch.env.logic": 0.8998316498316499, "success_rate.epoch.env.math": 0.9710399297937692, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8401913875598086, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8691947597803913, "success_rate.epoch.global": 0.9008717310087173, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957482993197279, "tokens_p.mean_in_band": 0.7845982142857143, "tokens_rate.above_band": 0.9545454545454546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045454545454545456 }, { "epoch": 1.857690668939071, "grad_norm": 71.15789116107545, "learning_rate": 3.658320604325335e-07, "loss": 0.2932, "step": 8720, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8638132295719845, "success_rate.epoch.env.agentgym:sciworld": 0.97265625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9483695652173914, "success_rate.epoch.env.logic": 0.8998316498316499, "success_rate.epoch.env.math": 0.9711033274956217, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8403950302644154, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.869222592707703, "success_rate.epoch.global": 0.9009950248756219, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9803921568627451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0196078431372549 }, { "epoch": 1.8587558585428208, "grad_norm": 0.0, "learning_rate": 3.6579827547271627e-07, "loss": 0.2052, "step": 8725, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8643410852713178, "success_rate.epoch.env.agentgym:sciworld": 0.97265625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9486486486486486, "success_rate.epoch.env.logic": 0.8999158957106812, "success_rate.epoch.env.math": 0.9706911636045494, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8405981546293351, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.869284605933473, "success_rate.epoch.global": 0.9009937888198758, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976697736351531, "tokens_p.mean_in_band": 0.3794642857142857, "tokens_rate.above_band": 0.9907651715039578, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009234828496042216 }, { "epoch": 1.85982104814657, "grad_norm": 172.7191476254395, "learning_rate": 3.6576448628715754e-07, "loss": 0.4475, "step": 8730, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8643410852713178, "success_rate.epoch.env.agentgym:sciworld": 0.97265625, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9486486486486486, "success_rate.epoch.env.logic": 0.8999158957106812, "success_rate.epoch.env.math": 0.9707423580786027, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8402667513496348, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8692591324056868, "success_rate.epoch.global": 0.9008684863523573, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9922520661157025, "tokens_p.mean_in_band": 0.4060329861111111, "tokens_rate.above_band": 0.9307692307692308, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06923076923076923 }, { "epoch": 1.8608862377503197, "grad_norm": 76.55022844979887, "learning_rate": 3.657306929004827e-07, "loss": 0.2583, "step": 8735, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, "success_rate.epoch.env.agentgym:sciworld": 0.9727626459143969, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9486486486486486, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9707933740191804, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8401015228426396, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8693136839721273, "success_rate.epoch.global": 0.9008674101610905, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972646882494005, "tokens_p.mean_in_band": 0.7354166666666667, "tokens_rate.above_band": 0.9823321554770318, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0176678445229682 }, { "epoch": 1.861951427354069, "grad_norm": 26.311656830578926, "learning_rate": 3.6569689533732e-07, "loss": 0.1422, "step": 8740, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9728682170542635, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9486486486486486, "success_rate.epoch.env.logic": 0.8992443324937027, "success_rate.epoch.env.math": 0.9708188153310104, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8403547671840355, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.869327169409086, "success_rate.epoch.global": 0.9008663366336633, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967693836978131, "tokens_p.mean_in_band": 0.4947916666666667, "tokens_rate.above_band": 0.9940711462450593, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005928853754940711 }, { "epoch": 1.8630166169578186, "grad_norm": 49.168202966392805, "learning_rate": 3.656630936223009e-07, "loss": 0.233, "step": 8745, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9728682170542635, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9486486486486486, "success_rate.epoch.env.logic": 0.8993288590604027, "success_rate.epoch.env.math": 0.9709075119409466, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8404053198226725, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8693510493782249, "success_rate.epoch.global": 0.9009888751545118, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974609375, "tokens_p.mean_in_band": 0.69453125, "tokens_rate.above_band": 0.9696969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030303030303030304 }, { "epoch": 1.8640818065615679, "grad_norm": 128.56927975701845, "learning_rate": 3.656292877800599e-07, "loss": 0.3961, "step": 8750, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9487870619946092, "success_rate.epoch.env.logic": 0.8994132439228835, "success_rate.epoch.env.math": 0.9709453599306158, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8405567858272699, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8690578370490627, "success_rate.epoch.global": 0.9009876543209877, "success_rate.window.env.agentgym:sciworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967548076923077, "tokens_p.mean_in_band": 0.7314453125, "tokens_rate.above_band": 0.9923664122137404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007633587786259542 }, { "epoch": 1.8651469961653175, "grad_norm": 78.1317822767448, "learning_rate": 3.655954778352344e-07, "loss": 0.2493, "step": 8755, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9487870619946092, "success_rate.epoch.env.logic": 0.8996655518394648, "success_rate.epoch.env.math": 0.9709579540528825, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8400758533501896, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8687953519397484, "success_rate.epoch.global": 0.9007398273736128, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982578397212544, "tokens_p.mean_in_band": 0.5829326923076923, "tokens_rate.above_band": 0.9851258581235698, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014874141876430207 }, { "epoch": 1.8662121857690668, "grad_norm": 25.245321186340654, "learning_rate": 3.655616638124649e-07, "loss": 0.1284, "step": 8760, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9487870619946092, "success_rate.epoch.env.logic": 0.899749373433584, "success_rate.epoch.env.math": 0.9709579540528825, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8403281792363522, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8685696245334139, "success_rate.epoch.global": 0.9007389162561577, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967271959459459, "tokens_p.mean_in_band": 0.7163461538461539, "tokens_rate.above_band": 0.9579288025889967, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042071197411003236 }, { "epoch": 1.8672773753728165, "grad_norm": 62.88486349099041, "learning_rate": 3.655278457363947e-07, "loss": 0.3126, "step": 8765, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9487870619946092, "success_rate.epoch.env.logic": 0.8991666666666667, "success_rate.epoch.env.math": 0.970983109571243, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8402646502835539, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8685131626969272, "success_rate.epoch.global": 0.9006150061500615, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8222222222222223, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99, "tokens_p.mean_in_band": 0.6234019886363636, "tokens_rate.above_band": 0.872093023255814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12790697674418605 }, { "epoch": 1.8683425649765657, "grad_norm": 22.074902832687243, "learning_rate": 3.6549402363167033e-07, "loss": 0.1873, "step": 8770, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8625954198473282, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9487870619946092, "success_rate.epoch.env.logic": 0.8991666666666667, "success_rate.epoch.env.math": 0.9710583153347733, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8404657016991819, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8685382769859415, "success_rate.epoch.global": 0.9007371007371008, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9938668224299065, "tokens_p.mean_in_band": 0.6162109375, "tokens_rate.above_band": 0.963963963963964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036036036036036036 }, { "epoch": 1.8694077545803154, "grad_norm": 396.0226969200282, "learning_rate": 3.65460197522941e-07, "loss": 0.3681, "step": 8775, "success_rate.epoch.env.abd": 0.9861878453038674, "success_rate.epoch.env.agentgym:alfworld": 0.8631178707224335, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9487870619946092, "success_rate.epoch.env.logic": 0.8985868661679135, "success_rate.epoch.env.math": 0.9710708117443869, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8403519798868636, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8685273393304851, "success_rate.epoch.global": 0.9006134969325154, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8833333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0009407722513088, "tokens_p.mean_in_band": 0.45703125, "tokens_rate.above_band": 0.9744897959183674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025510204081632654 }, { "epoch": 1.8704729441840646, "grad_norm": 51.67374573714006, "learning_rate": 3.654263674348589e-07, "loss": 0.4164, "step": 8780, "success_rate.epoch.env.abd": 0.9861878453038674, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9489247311827957, "success_rate.epoch.env.logic": 0.8987551867219917, "success_rate.epoch.env.math": 0.9711206896551724, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8404522613065326, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8686159431474533, "success_rate.epoch.global": 0.9007352941176471, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969099813432836, "tokens_p.mean_in_band": 0.7328125, "tokens_rate.above_band": 0.9907578558225508, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009242144177449169 }, { "epoch": 1.8715381337878143, "grad_norm": 42.24871642244632, "learning_rate": 3.6539253339207926e-07, "loss": 0.2346, "step": 8785, "success_rate.epoch.env.abd": 0.9862258953168044, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9490616621983914, "success_rate.epoch.env.logic": 0.8987551867219917, "success_rate.epoch.env.math": 0.9711579853637538, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8406524466750314, "success_rate.epoch.env.webshop": 0.9782608695652174, "success_rate.epoch.env_macro_mean": 0.8686973571377308, "success_rate.epoch.global": 0.9008567931456548, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976765799256505, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.9950678175092479, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004932182490752158 }, { "epoch": 1.8726033233915635, "grad_norm": 211.27373804971876, "learning_rate": 3.6535869541926004e-07, "loss": 0.1801, "step": 8790, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.9493333333333334, "success_rate.epoch.env.logic": 0.8987551867219917, "success_rate.epoch.env.math": 0.9711703958691911, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8405388471177945, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687617654394993, "success_rate.epoch.global": 0.9008557457212714, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996191926884996, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.996962794229309, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0030372057706909645 }, { "epoch": 1.8736685129953132, "grad_norm": 77.10857938654429, "learning_rate": 3.653248535410621e-07, "loss": 0.347, "step": 8795, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8641509433962264, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.949468085106383, "success_rate.epoch.env.logic": 0.8980099502487562, "success_rate.epoch.env.math": 0.9712199312714777, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8400625978090767, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687142546352488, "success_rate.epoch.global": 0.9006105006105006, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9972933070866141, "tokens_p.mean_below_band": 4.94765117764473e-09, "tokens_p.mean_in_band": 0.5048828125, "tokens_rate.above_band": 0.9824191279887482, "tokens_rate.below_band": 0.0007032348804500703, "tokens_rate.in_band": 0.016877637130801686 }, { "epoch": 1.8747337025990625, "grad_norm": 442.0879752917922, "learning_rate": 3.652910077821492e-07, "loss": 0.3164, "step": 8800, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8641509433962264, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.949468085106383, "success_rate.epoch.env.logic": 0.8974358974358975, "success_rate.epoch.env.math": 0.9708529789969995, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8401126408010012, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686366606221381, "success_rate.epoch.global": 0.9004878048780488, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992988782051282, "tokens_p.mean_in_band": 0.5529296875, "tokens_rate.above_band": 0.968944099378882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031055900621118012 }, { "epoch": 1.8757988922028122, "grad_norm": 81.08565346136402, "learning_rate": 3.652571581671878e-07, "loss": 0.3476, "step": 8805, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8641509433962264, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.949468085106383, "success_rate.epoch.env.logic": 0.8968646864686468, "success_rate.epoch.env.math": 0.9708904109589042, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8396875, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8685494860942885, "success_rate.epoch.global": 0.9002436053593179, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0012820512820513, "tokens_p.mean_in_band": 0.5306332236842105, "tokens_rate.above_band": 0.9390048154093098, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.060995184590690206 }, { "epoch": 1.8768640818065616, "grad_norm": 148.91949073523304, "learning_rate": 3.652233047208473e-07, "loss": 0.2632, "step": 8810, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9534883720930233, "success_rate.epoch.env.ded": 0.949468085106383, "success_rate.epoch.env.logic": 0.8962962962962963, "success_rate.epoch.env.math": 0.9709277469003847, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8395254448954106, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8685329044037114, "success_rate.epoch.global": 0.9001216545012165, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971497252747252, "tokens_p.mean_in_band": 0.5689808238636364, "tokens_rate.above_band": 0.9763948497854077, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023605150214592276 }, { "epoch": 1.877929271410311, "grad_norm": 65.76655117597862, "learning_rate": 3.6518944746779984e-07, "loss": 0.2495, "step": 8815, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9496021220159151, "success_rate.epoch.env.logic": 0.8963815789473685, "success_rate.epoch.env.math": 0.9709773794280837, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8393636930754834, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686387482874213, "success_rate.epoch.global": 0.9001215066828676, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975071225071225, "tokens_p.mean_in_band": 0.5050223214285714, "tokens_rate.above_band": 0.9804469273743017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019553072625698324 }, { "epoch": 1.8789944610140605, "grad_norm": 86.95907042611266, "learning_rate": 3.651555864327204e-07, "loss": 0.1606, "step": 8820, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9496021220159151, "success_rate.epoch.env.logic": 0.896636587366694, "success_rate.epoch.env.math": 0.9710144927536232, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8392523364485981, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686551814799649, "success_rate.epoch.global": 0.9001213592233009, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9885752688172043, "tokens_p.mean_in_band": 0.37109375, "tokens_rate.above_band": 0.9789473684210527, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021052631578947368 }, { "epoch": 1.88005965061781, "grad_norm": 62.78270060399613, "learning_rate": 3.6512172164028663e-07, "loss": 0.2073, "step": 8825, "success_rate.epoch.env.abd": 0.9864130434782609, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9496021220159151, "success_rate.epoch.env.logic": 0.8967213114754098, "success_rate.epoch.env.math": 0.9710515112813963, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8394523957685127, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686911858090972, "success_rate.epoch.global": 0.9002424242424243, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965490797546013, "tokens_p.mean_in_band": 0.79296875, "tokens_rate.above_band": 0.9760479041916168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023952095808383235 }, { "epoch": 1.8811248402215595, "grad_norm": 276.27209638409545, "learning_rate": 3.6508785311517884e-07, "loss": 0.4515, "step": 8830, "success_rate.epoch.env.abd": 0.986449864498645, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9496021220159151, "success_rate.epoch.env.logic": 0.8967213114754098, "success_rate.epoch.env.math": 0.971063829787234, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8386095592799503, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686190315397936, "success_rate.epoch.global": 0.8998789346246974, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.990234375, "tokens_p.mean_in_band": 0.5220947265625, "tokens_rate.above_band": 0.8888888888888888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1111111111111111 }, { "epoch": 1.882190029825309, "grad_norm": 108.39685236365882, "learning_rate": 3.6505398088208035e-07, "loss": 0.2833, "step": 8835, "success_rate.epoch.env.abd": 0.986449864498645, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9496021220159151, "success_rate.epoch.env.logic": 0.8968903436988543, "success_rate.epoch.env.math": 0.9711007224819379, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8388596219398823, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686604849559827, "success_rate.epoch.global": 0.9, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9927325581395349, "tokens_p.mean_in_band": 0.8736979166666666, "tokens_rate.above_band": 0.9662921348314607, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033707865168539325 }, { "epoch": 1.8832552194290584, "grad_norm": 207.93342982331916, "learning_rate": 3.6502010496567693e-07, "loss": 0.2613, "step": 8840, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9497354497354498, "success_rate.epoch.env.logic": 0.8969746524938675, "success_rate.epoch.env.math": 0.9711252653927813, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8391089108910891, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687084931709318, "success_rate.epoch.global": 0.9001207729468599, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997749162479062, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9983277591973244, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016722408026755853 }, { "epoch": 1.8843204090328078, "grad_norm": 59.02728698427287, "learning_rate": 3.6498622539065705e-07, "loss": 0.1715, "step": 8845, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9498680738786279, "success_rate.epoch.env.logic": 0.8969746524938675, "success_rate.epoch.env.math": 0.9711742263671047, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8389987639060569, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687256229876226, "success_rate.epoch.global": 0.9001206272617611, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993792808219178, "tokens_p.mean_in_band": 0.5442708333333334, "tokens_rate.above_band": 0.9918478260869565, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008152173913043478 }, { "epoch": 1.8853855986365573, "grad_norm": 288.46010219787297, "learning_rate": 3.6495234218171193e-07, "loss": 0.3287, "step": 8850, "success_rate.epoch.env.abd": 0.9865591397849462, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9498680738786279, "success_rate.epoch.env.logic": 0.8969746524938675, "success_rate.epoch.env.math": 0.9711864406779661, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8389882788402221, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687429396001639, "success_rate.epoch.global": 0.9001204819277109, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975703753351206, "tokens_p.mean_in_band": 0.5817057291666666, "tokens_rate.above_band": 0.9688311688311688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03116883116883117 }, { "epoch": 1.8864507882403068, "grad_norm": 131.39455046498674, "learning_rate": 3.6491845536353545e-07, "loss": 0.375, "step": 8855, "success_rate.epoch.env.abd": 0.9865591397849462, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9498680738786279, "success_rate.epoch.env.logic": 0.8964110929853181, "success_rate.epoch.env.math": 0.9712473572938689, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8390875462392109, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687062691007404, "success_rate.epoch.global": 0.9001203369434416, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992722602739726, "tokens_p.mean_in_band": 0.6607142857142857, "tokens_rate.above_band": 0.954248366013072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0457516339869281 }, { "epoch": 1.8875159778440562, "grad_norm": 33.0871024813285, "learning_rate": 3.64884564960824e-07, "loss": 0.3077, "step": 8860, "success_rate.epoch.env.abd": 0.9865951742627346, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9498680738786279, "success_rate.epoch.env.logic": 0.8958502847843776, "success_rate.epoch.env.math": 0.9712837837837838, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8392362180474284, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686753895166478, "success_rate.epoch.global": 0.9001201923076924, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950181159420289, "tokens_p.mean_in_band": 0.66064453125, "tokens_rate.above_band": 0.9452054794520548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0547945205479452 }, { "epoch": 1.8885811674478057, "grad_norm": 73.15717917151254, "learning_rate": 3.648506709982767e-07, "loss": 0.2875, "step": 8865, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9498680738786279, "success_rate.epoch.env.logic": 0.8960194963444355, "success_rate.epoch.env.math": 0.970873786407767, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8391758917589176, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686545149984171, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994745575221239, "tokens_p.mean_in_band": 0.6860795454545454, "tokens_rate.above_band": 0.9112903225806451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08870967741935484 }, { "epoch": 1.8896463570515551, "grad_norm": 112.30454056775271, "learning_rate": 3.6481677350059525e-07, "loss": 0.4443, "step": 8870, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8646616541353384, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8960194963444355, "success_rate.epoch.env.math": 0.9709228824273072, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8394227817009517, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8686934160968668, "success_rate.epoch.global": 0.9001199040767386, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9803921568627451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0196078431372549 }, { "epoch": 1.8907115466553046, "grad_norm": 108.87575965681468, "learning_rate": 3.647828724924839e-07, "loss": 0.1819, "step": 8875, "success_rate.epoch.env.abd": 0.9867021276595744, "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8960194963444355, "success_rate.epoch.env.math": 0.9709595959595959, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8396197485433916, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687744385701105, "success_rate.epoch.global": 0.9002395209580838, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980755131964809, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9970760233918129, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0029239766081871343 }, { "epoch": 1.891776736259054, "grad_norm": 27.242299934578543, "learning_rate": 3.6474896799864945e-07, "loss": 0.2229, "step": 8880, "success_rate.epoch.env.abd": 0.9867021276595744, "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9501312335958005, "success_rate.epoch.env.logic": 0.8960194963444355, "success_rate.epoch.env.math": 0.9710084033613445, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8392529087568892, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8687574568620238, "success_rate.epoch.global": 0.9001196172248804, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965128755364807, "tokens_p.mean_in_band": 0.5091145833333334, "tokens_rate.above_band": 0.9395161290322581, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06048387096774194 }, { "epoch": 1.8928419258628035, "grad_norm": 75.14965641933597, "learning_rate": 3.647150600438012e-07, "loss": 0.3553, "step": 8885, "success_rate.epoch.env.abd": 0.986737400530504, "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9502617801047121, "success_rate.epoch.env.logic": 0.8961038961038961, "success_rate.epoch.env.math": 0.9710205795884083, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8391929073677774, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.868816152897351, "success_rate.epoch.global": 0.9001194743130227, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9666666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977494855967078, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9858012170385395, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014198782961460446 }, { "epoch": 1.893907115466553, "grad_norm": 55.776692901584276, "learning_rate": 3.6468114865265116e-07, "loss": 0.3356, "step": 8890, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9502617801047121, "success_rate.epoch.env.logic": 0.8962722852512156, "success_rate.epoch.env.math": 0.9710570469798657, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8393402565668907, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8688545340855497, "success_rate.epoch.global": 0.9002386634844869, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958333333333333, "tokens_p.mean_in_band": 0.6627604166666666, "tokens_rate.above_band": 0.9803921568627451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0196078431372549 }, { "epoch": 1.8949723050703025, "grad_norm": 296.52137807605504, "learning_rate": 3.646472338499136e-07, "loss": 0.1957, "step": 8895, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8656716417910447, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9502617801047121, "success_rate.epoch.env.logic": 0.8963562753036437, "success_rate.epoch.env.math": 0.971081307627829, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8396341463414634, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8689368288982919, "success_rate.epoch.global": 0.900357568533969, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977078239608802, "tokens_p.mean_in_band": 0.78828125, "tokens_rate.above_band": 0.9879227053140096, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012077294685990338 }, { "epoch": 1.8960374946740521, "grad_norm": 100.64244478297722, "learning_rate": 3.6461331566030537e-07, "loss": 0.2256, "step": 8900, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8656716417910447, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9503916449086162, "success_rate.epoch.env.logic": 0.8964401294498382, "success_rate.epoch.env.math": 0.9711297071129708, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8398294762484775, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8689784151112242, "success_rate.epoch.global": 0.9004761904761904, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970529359430605, "tokens_p.mean_in_band": 0.779296875, "tokens_rate.above_band": 0.9964539007092199, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0035460992907801418 }, { "epoch": 1.8971026842778014, "grad_norm": 0.0, "learning_rate": 3.6457939410854587e-07, "loss": 0.1274, "step": 8905, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9505208333333334, "success_rate.epoch.env.logic": 0.8964401294498382, "success_rate.epoch.env.math": 0.9711417816813049, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8398176291793313, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8690806370020904, "success_rate.epoch.global": 0.9004756242568371, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978402679830748, "tokens_p.mean_in_band": 0.3984375, "tokens_rate.above_band": 0.9957865168539326, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004213483146067416 }, { "epoch": 1.898167873881551, "grad_norm": 87.1933198476706, "learning_rate": 3.6454546921935686e-07, "loss": 0.2482, "step": 8910, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9505208333333334, "success_rate.epoch.env.logic": 0.8964401294498382, "success_rate.epoch.env.math": 0.9707846410684474, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8397572078907436, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8690426768292315, "success_rate.epoch.global": 0.9003562945368171, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.993421052631579, "tokens_p.mean_in_band": 0.471435546875, "tokens_rate.above_band": 0.9344262295081968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06557377049180328 }, { "epoch": 1.8992330634853003, "grad_norm": 67.01756454499545, "learning_rate": 3.645115410174625e-07, "loss": 0.2604, "step": 8915, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8671586715867159, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9505208333333334, "success_rate.epoch.env.logic": 0.8964401294498382, "success_rate.epoch.env.math": 0.9708211754897874, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.8399029714978775, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8689021145780077, "success_rate.epoch.global": 0.900355871886121, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984793187347932, "tokens_p.mean_in_band": 0.7333333333333333, "tokens_rate.above_band": 0.9647887323943662, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035211267605633804 }, { "epoch": 1.90029825308905, "grad_norm": 124.8867859723598, "learning_rate": 3.6447760952758945e-07, "loss": 0.4629, "step": 8920, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.948051948051948, "success_rate.epoch.env.logic": 0.8965238480194018, "success_rate.epoch.env.math": 0.9708454810495627, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.839794064203513, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8687515049521056, "success_rate.epoch.global": 0.9002369668246446, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9906776511397423, "tokens_p.mean_in_band": 0.5905602229899497, "tokens_rate.above_band": 0.8352649006622517, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16473509933774835 }, { "epoch": 1.9013634426927992, "grad_norm": 534.2548537255205, "learning_rate": 3.6444367477446683e-07, "loss": 0.2376, "step": 8925, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9699248120300752, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.948051948051948, "success_rate.epoch.env.logic": 0.8966908797417272, "success_rate.epoch.env.math": 0.970869746150645, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.8396854204476709, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.868779731806712, "success_rate.epoch.global": 0.9002366863905326, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980630165289256, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9603174603174603, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03968253968253968 }, { "epoch": 1.902428632296549, "grad_norm": 138.79055943098803, "learning_rate": 3.6440973678282596e-07, "loss": 0.2636, "step": 8930, "success_rate.epoch.env.abd": 0.9868766404199475, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.948051948051948, "success_rate.epoch.env.logic": 0.8968573730862208, "success_rate.epoch.env.math": 0.9708939708939709, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.839782345828295, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8685442195725792, "success_rate.epoch.global": 0.9002364066193853, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982461734693877, "tokens_p.mean_in_band": 0.6927083333333334, "tokens_rate.above_band": 0.9423076923076923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.057692307692307696 }, { "epoch": 1.9034938219002981, "grad_norm": 123.68342906320828, "learning_rate": 3.643757955774006e-07, "loss": 0.1959, "step": 8935, "success_rate.epoch.env.abd": 0.9868766404199475, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9481865284974094, "success_rate.epoch.env.logic": 0.8970233306516492, "success_rate.epoch.env.math": 0.9709302325581395, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8396739130434783, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8685649801989649, "success_rate.epoch.global": 0.9002361275088547, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950564971751412, "tokens_p.mean_in_band": 0.7360026041666666, "tokens_rate.above_band": 0.9365079365079365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06349206349206349 }, { "epoch": 1.9045590115040478, "grad_norm": 218.0056230192956, "learning_rate": 3.643418511829268e-07, "loss": 0.3123, "step": 8940, "success_rate.epoch.env.abd": 0.9869109947643979, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9457364341085271, "success_rate.epoch.env.logic": 0.8971887550200803, "success_rate.epoch.env.math": 0.970954356846473, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8395173453996984, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8683584538612865, "success_rate.epoch.global": 0.9001179245283019, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955074186113512, "tokens_p.mean_below_band": 1.2759119272232056e-07, "tokens_p.mean_in_band": 0.5483903556034483, "tokens_rate.above_band": 0.8816357632715266, "tokens_rate.below_band": 0.000508001016002032, "tokens_rate.in_band": 0.11785623571247142 }, { "epoch": 1.905624201107797, "grad_norm": 12.2928777285083, "learning_rate": 3.643079036241432e-07, "loss": 0.6015, "step": 8945, "success_rate.epoch.env.abd": 0.9869451697127938, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9458762886597938, "success_rate.epoch.env.logic": 0.8972712680577849, "success_rate.epoch.env.math": 0.9710024855012428, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8396624472573839, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8683993423203611, "success_rate.epoch.global": 0.9002355712603063, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0012953367875648, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9994821336095288, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0005178663904712584 }, { "epoch": 1.9066893907115467, "grad_norm": 38.41411912337251, "learning_rate": 3.6427395292579024e-07, "loss": 0.1982, "step": 8950, "success_rate.epoch.env.abd": 0.9869791666666666, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9460154241645244, "success_rate.epoch.env.logic": 0.8973536487570168, "success_rate.epoch.env.math": 0.9710504549214226, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8395061728395061, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8684127248803737, "success_rate.epoch.global": 0.900235294117647, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980005924170616, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9813953488372092, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018604651162790697 }, { "epoch": 1.907754580315296, "grad_norm": 51.110429199823585, "learning_rate": 3.6423999911261116e-07, "loss": 0.3135, "step": 8955, "success_rate.epoch.env.abd": 0.9870466321243523, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9460154241645244, "success_rate.epoch.env.logic": 0.8976, "success_rate.epoch.env.math": 0.9710863279636514, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8396027685826061, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8684532962881917, "success_rate.epoch.global": 0.9003525264394829, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995361328125, "tokens_p.mean_in_band": 0.5546875, "tokens_rate.above_band": 0.9922480620155039, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007751937984496124 }, { "epoch": 1.9088197699190457, "grad_norm": 99.64006925845497, "learning_rate": 3.642060422093512e-07, "loss": 0.1851, "step": 8960, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9460154241645244, "success_rate.epoch.env.logic": 0.8976, "success_rate.epoch.env.math": 0.9711101939744119, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8397956116621581, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8685586072121495, "success_rate.epoch.global": 0.9004694835680751, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984038978494624, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.909884959522795, "grad_norm": 42.651445610609706, "learning_rate": 3.6417208224075794e-07, "loss": 0.1813, "step": 8965, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9555555555555556, "success_rate.epoch.env.ded": 0.9462915601023018, "success_rate.epoch.env.logic": 0.8976818545163869, "success_rate.epoch.env.math": 0.9707216494845361, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8399879915941159, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686170705258323, "success_rate.epoch.global": 0.9004689331770223, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984406822810591, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9939271255060729, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006072874493927126 }, { "epoch": 1.9109501491265446, "grad_norm": 68.41679309492326, "learning_rate": 3.641381192315811e-07, "loss": 0.5337, "step": 8970, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9462915601023018, "success_rate.epoch.env.logic": 0.8977635782747604, "success_rate.epoch.env.math": 0.9707457766790276, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8399760263709919, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8687134404626158, "success_rate.epoch.global": 0.9004683840749415, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923780487804879, "tokens_p.mean_in_band": 0.54765625, "tokens_rate.above_band": 0.8913043478260869, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10869565217391304 }, { "epoch": 1.9120153387302938, "grad_norm": 102.54793159479081, "learning_rate": 3.6410415320657266e-07, "loss": 0.3892, "step": 8975, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9462915601023018, "success_rate.epoch.env.logic": 0.897047086991221, "success_rate.epoch.env.math": 0.9703947368421053, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8398683029033224, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686065991364221, "success_rate.epoch.global": 0.9002339181286549, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9908002336448598, "tokens_p.mean_in_band": 0.623046875, "tokens_rate.above_band": 0.9224137931034483, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07758620689655173 }, { "epoch": 1.9130805283340435, "grad_norm": 79.87936609934741, "learning_rate": 3.640701841904869e-07, "loss": 0.2685, "step": 8980, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8971291866028708, "success_rate.epoch.env.math": 0.9704433497536946, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8397608370702541, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686211680470076, "success_rate.epoch.global": 0.900233644859813, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966482649842271, "tokens_p.mean_in_band": 0.7274305555555556, "tokens_rate.above_band": 0.9723926380368099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027607361963190184 }, { "epoch": 1.914145717937793, "grad_norm": 65.72260002128823, "learning_rate": 3.640362122080802e-07, "loss": 0.301, "step": 8985, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9704797047970479, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.896414342629482, "success_rate.epoch.env.math": 0.970467596390484, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8394509101760669, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8685401507920886, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9923245614035088, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9144385026737968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0855614973262032 }, { "epoch": 1.9152109075415424, "grad_norm": 80.78397466931366, "learning_rate": 3.6400223728411094e-07, "loss": 0.1647, "step": 8990, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9704797047970479, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8966613672496025, "success_rate.epoch.env.math": 0.9705159705159705, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8395466746197435, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686191447551852, "success_rate.epoch.global": 0.9001165501165501, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966902709359606, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.916276097145292, "grad_norm": 63.13953004143602, "learning_rate": 3.6396825944334e-07, "loss": 0.2674, "step": 8995, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9704797047970479, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8968253968253969, "success_rate.epoch.env.math": 0.9705641864268193, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8397378611855824, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686558203963198, "success_rate.epoch.global": 0.9002328288707799, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9934413580246914, "tokens_p.mean_in_band": 0.809375, "tokens_rate.above_band": 0.9418604651162791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05813953488372093 }, { "epoch": 1.9173412867490414, "grad_norm": 41.61706506620451, "learning_rate": 3.6393427871053005e-07, "loss": 0.3223, "step": 9000, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8968253968253969, "success_rate.epoch.env.math": 0.9706242350061199, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8398332837153915, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686926418245666, "success_rate.epoch.global": 0.9003488372093024, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989329268292683, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9951456310679612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0048543689320388345 }, { "epoch": 1.9184064763527908, "grad_norm": 81.30143203275551, "learning_rate": 3.6390029511044604e-07, "loss": 0.4031, "step": 9005, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9440203562340967, "success_rate.epoch.env.logic": 0.8962787015043547, "success_rate.epoch.env.math": 0.9706242350061199, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8397740784780023, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8685027285179573, "success_rate.epoch.global": 0.9001161440185831, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9950268058690744, "tokens_p.mean_in_band": 0.7016447368421053, "tokens_rate.above_band": 0.9031600407747197, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09683995922528033 }, { "epoch": 1.9194716659565403, "grad_norm": 169.86596290553987, "learning_rate": 3.63866308667855e-07, "loss": 0.2962, "step": 9010, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9441624365482234, "success_rate.epoch.env.logic": 0.8964426877470356, "success_rate.epoch.env.math": 0.9706481858948227, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8397150489759573, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8685273637855452, "success_rate.epoch.global": 0.9001160092807424, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974759615384615, "tokens_p.mean_in_band": 0.5205078125, "tokens_rate.above_band": 0.9701492537313433, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029850746268656716 }, { "epoch": 1.9205368555602897, "grad_norm": 142.77595837046383, "learning_rate": 3.6383231940752596e-07, "loss": 0.4036, "step": 9015, "success_rate.epoch.env.abd": 0.987146529562982, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9706959706959707, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9441624365482234, "success_rate.epoch.env.logic": 0.8966876971608833, "success_rate.epoch.env.math": 0.9706720977596741, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8396086569819152, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8685451507660709, "success_rate.epoch.global": 0.9001158748551564, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954268292682927, "tokens_p.mean_in_band": 0.6497395833333334, "tokens_rate.above_band": 0.9447004608294931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055299539170506916 }, { "epoch": 1.9216020451640392, "grad_norm": 90.08525616667941, "learning_rate": 3.637983273542301e-07, "loss": 0.1618, "step": 9020, "success_rate.epoch.env.abd": 0.9872122762148338, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9441624365482234, "success_rate.epoch.env.logic": 0.8968503937007875, "success_rate.epoch.env.math": 0.9706720977596741, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8397986378442405, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686025638812709, "success_rate.epoch.global": 0.9002314814814815, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988095238095238, "tokens_p.mean_in_band": 0.8583984375, "tokens_rate.above_band": 0.9924385633270322, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007561436672967864 }, { "epoch": 1.9226672347677887, "grad_norm": 168.30605562541513, "learning_rate": 3.637643325327406e-07, "loss": 0.2117, "step": 9025, "success_rate.epoch.env.abd": 0.9872122762148338, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9441624365482234, "success_rate.epoch.env.logic": 0.8969315499606609, "success_rate.epoch.env.math": 0.9707317073170731, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8396923986986099, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686057026696019, "success_rate.epoch.global": 0.9002312138728323, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9909722222222223, "tokens_p.mean_in_band": 0.5520833333333334, "tokens_rate.above_band": 0.8571428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14285714285714285 }, { "epoch": 1.9237324243715381, "grad_norm": 90.51657523408925, "learning_rate": 3.6373033496783253e-07, "loss": 0.2778, "step": 9030, "success_rate.epoch.env.abd": 0.9872448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9443037974683545, "success_rate.epoch.env.logic": 0.8969315499606609, "success_rate.epoch.env.math": 0.9707436001625356, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8399763802775317, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686915358868077, "success_rate.epoch.global": 0.9003464203233257, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980491329479769, "tokens_p.mean_in_band": 0.73193359375, "tokens_rate.above_band": 0.9908361970217641, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009163802978235968 }, { "epoch": 1.9247976139752876, "grad_norm": 50.94793959134441, "learning_rate": 3.636963346842832e-07, "loss": 0.2667, "step": 9035, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9443037974683545, "success_rate.epoch.env.logic": 0.8970125786163522, "success_rate.epoch.env.math": 0.9703613479496549, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.839622641509434, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8683607088098267, "success_rate.epoch.global": 0.9, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.62, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.995045731707317, "tokens_p.mean_in_band": 0.5623337765957447, "tokens_rate.above_band": 0.7772511848341233, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22274881516587677 }, { "epoch": 1.925862803579037, "grad_norm": 55.74473206296719, "learning_rate": 3.6366233170687165e-07, "loss": 0.2714, "step": 9040, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8700361010830325, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9574468085106383, "success_rate.epoch.env.ded": 0.9443037974683545, "success_rate.epoch.env.logic": 0.8971742543171115, "success_rate.epoch.env.math": 0.9703733766233766, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8395643214601118, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8684235878881374, "success_rate.epoch.global": 0.9, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971942724458205, "tokens_p.mean_in_band": 0.3984375, "tokens_rate.above_band": 0.9877675840978594, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012232415902140673 }, { "epoch": 1.9269279931827865, "grad_norm": 133.49173403920906, "learning_rate": 3.636283260603791e-07, "loss": 0.3041, "step": 9045, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8971742543171115, "success_rate.epoch.env.math": 0.9703853955375253, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8398001175778954, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686241904047892, "success_rate.epoch.global": 0.9001150747986191, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995659722222222, "tokens_p.mean_in_band": 0.658203125, "tokens_rate.above_band": 0.9969230769230769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003076923076923077 }, { "epoch": 1.927993182786536, "grad_norm": 612.7860374754193, "learning_rate": 3.635943177695886e-07, "loss": 0.4929, "step": 9050, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8714285714285714, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8974158183241974, "success_rate.epoch.env.math": 0.9704453441295546, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8398471936526594, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686977738745135, "success_rate.epoch.global": 0.9002298850574713, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979108635097493, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.9290583723902854, "grad_norm": 224.17483075811546, "learning_rate": 3.635603068592851e-07, "loss": 0.2035, "step": 9055, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8718861209964412, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8974960876369327, "success_rate.epoch.env.math": 0.9704811969268096, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8400821596244131, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8687712863881146, "success_rate.epoch.global": 0.9003444316877153, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965441176470589, "tokens_p.mean_in_band": 0.6953125, "tokens_rate.above_band": 0.9976525821596244, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002347417840375587 }, { "epoch": 1.930123561994035, "grad_norm": 126.74713334968651, "learning_rate": 3.635262933542556e-07, "loss": 0.2898, "step": 9060, "success_rate.epoch.env.abd": 0.9873096446700508, "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9445843828715366, "success_rate.epoch.env.logic": 0.89765625, "success_rate.epoch.env.math": 0.9705169628432956, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.839882697947214, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8688279228013333, "success_rate.epoch.global": 0.9003440366972477, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987012987012988, "tokens_p.mean_in_band": 0.439453125, "tokens_rate.above_band": 0.9987029831387808, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012970168612191958 }, { "epoch": 1.9311887515977844, "grad_norm": 62.90172133206027, "learning_rate": 3.634922772792888e-07, "loss": 0.1731, "step": 9065, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9447236180904522, "success_rate.epoch.env.logic": 0.8978159126365055, "success_rate.epoch.env.math": 0.9705407586763519, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8397773872290568, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8688506055697122, "success_rate.epoch.global": 0.9003436426116839, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988517060367454, "tokens_p.mean_in_band": 0.5642361111111112, "tokens_rate.above_band": 0.9769230769230769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023076923076923078 }, { "epoch": 1.9322539412015338, "grad_norm": 127.52265247480662, "learning_rate": 3.6345825865917547e-07, "loss": 0.2649, "step": 9070, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9447236180904522, "success_rate.epoch.env.logic": 0.8979750778816199, "success_rate.epoch.env.math": 0.9705763804917372, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8399180567749488, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.86856242276276, "success_rate.epoch.global": 0.9003318457489415, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9972426470588235, "tokens_p.mean_in_band": 0.6611328125, "tokens_rate.above_band": 0.9855072463768116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014492753623188406 }, { "epoch": 1.9333191308052835, "grad_norm": 285.3429191357568, "learning_rate": 3.6342423751870807e-07, "loss": 0.243, "step": 9075, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9447236180904522, "success_rate.epoch.env.logic": 0.8980544747081712, "success_rate.epoch.env.math": 0.9705763804917372, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8402453271028038, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686510259506636, "success_rate.epoch.global": 0.9004457652303121, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980596405228758, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9902912621359223, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009708737864077669 }, { "epoch": 1.9343843204090327, "grad_norm": 77.8749109391679, "learning_rate": 3.633902138826809e-07, "loss": 0.2204, "step": 9080, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9447236180904522, "success_rate.epoch.env.logic": 0.8982919254658385, "success_rate.epoch.env.math": 0.9702093397745571, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8400933761307265, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8686529295331351, "success_rate.epoch.global": 0.900331088023747, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961538461538462, "tokens_p.mean_in_band": 0.6150390625, "tokens_rate.above_band": 0.8666666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13333333333333333 }, { "epoch": 1.9354495100127824, "grad_norm": 398.3227759738249, "learning_rate": 3.633561877758903e-07, "loss": 0.3421, "step": 9085, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9448621553884712, "success_rate.epoch.env.logic": 0.8985282726568552, "success_rate.epoch.env.math": 0.9702452754322477, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8402332361516035, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8687029913665536, "success_rate.epoch.global": 0.9004447485460144, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986361480075902, "tokens_p.mean_in_band": 0.8372395833333334, "tokens_rate.above_band": 0.9943396226415094, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005660377358490566 }, { "epoch": 1.9365146996165317, "grad_norm": 279.4423630613026, "learning_rate": 3.6332215922313415e-07, "loss": 0.1831, "step": 9090, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9448621553884712, "success_rate.epoch.env.logic": 0.8986068111455109, "success_rate.epoch.env.math": 0.970281124497992, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.840221187427241, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.868712294896557, "success_rate.epoch.global": 0.9004442419409956, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9905024509803921, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9622641509433962, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03773584905660377 }, { "epoch": 1.9375798892202813, "grad_norm": 50.96224276554615, "learning_rate": 3.6328812824921224e-07, "loss": 0.2398, "step": 9095, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9448621553884712, "success_rate.epoch.env.logic": 0.8987635239567233, "success_rate.epoch.env.math": 0.9699278267842822, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8403605699331201, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8687843983274148, "success_rate.epoch.global": 0.9004437364887928, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996822033898305, "tokens_p.mean_in_band": 0.4479166666666667, "tokens_rate.above_band": 0.9833333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016666666666666666 }, { "epoch": 1.9386450788240306, "grad_norm": 109.92298152517856, "learning_rate": 3.6325409487892607e-07, "loss": 0.2635, "step": 9100, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9448621553884712, "success_rate.epoch.env.logic": 0.8987635239567233, "success_rate.epoch.env.math": 0.9699759807846277, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8405460354342144, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8688569049537626, "success_rate.epoch.global": 0.9005568814638027, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967026378896883, "tokens_p.mean_in_band": 0.765625, "tokens_rate.above_band": 0.9952267303102625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00477326968973747 }, { "epoch": 1.9397102684277803, "grad_norm": 124.21391618067524, "learning_rate": 3.6322005913707894e-07, "loss": 0.1727, "step": 9105, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.945, "success_rate.epoch.env.logic": 0.8989197530864198, "success_rate.epoch.env.math": 0.9699879951980792, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8404409631563678, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8688809763434574, "success_rate.epoch.global": 0.9005562492904984, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973104508196722, "tokens_p.mean_in_band": 0.57421875, "tokens_rate.above_band": 0.991869918699187, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008130081300813009 }, { "epoch": 1.9407754580315295, "grad_norm": 62.23115194154413, "learning_rate": 3.6318602104847583e-07, "loss": 0.2065, "step": 9110, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9451371571072319, "success_rate.epoch.env.logic": 0.8984615384615384, "success_rate.epoch.env.math": 0.9700119952019193, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8402898550724638, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8688402340163925, "success_rate.epoch.global": 0.9004422270098651, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976777059773829, "tokens_p.mean_in_band": 0.6293402777777778, "tokens_rate.above_band": 0.9717425431711146, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0282574568288854 }, { "epoch": 1.9418406476352792, "grad_norm": 80.02891353760823, "learning_rate": 3.631519806379235e-07, "loss": 0.2089, "step": 9115, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9451371571072319, "success_rate.epoch.env.logic": 0.8987730061349694, "success_rate.epoch.env.math": 0.9700479233226837, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8404286128004633, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8688844297911374, "success_rate.epoch.global": 0.9005549892400045, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9913306451612903, "tokens_p.mean_in_band": 0.7369791666666666, "tokens_rate.above_band": 0.9281437125748503, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0718562874251497 }, { "epoch": 1.9429058372390284, "grad_norm": 72.53454885443212, "learning_rate": 3.631179379302303e-07, "loss": 0.248, "step": 9120, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9454094292803971, "success_rate.epoch.env.logic": 0.8988505747126436, "success_rate.epoch.env.math": 0.9700837654567211, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8405671296296297, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8689725183021048, "success_rate.epoch.global": 0.9006674963231135, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993259803921568, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9980430528375733, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019569471624266144 }, { "epoch": 1.9439710268427781, "grad_norm": 43.46356659659513, "learning_rate": 3.630838929502064e-07, "loss": 0.1965, "step": 9125, "success_rate.epoch.env.abd": 0.9874371859296482, "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9454094292803971, "success_rate.epoch.env.logic": 0.8990053557765876, "success_rate.epoch.env.math": 0.9701433121019108, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8403701561596298, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8689769727209294, "success_rate.epoch.global": 0.9006667420047463, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967105263157895, "tokens_p.mean_in_band": 0.4713541666666667, "tokens_rate.above_band": 0.8636363636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13636363636363635 }, { "epoch": 1.9450362164465274, "grad_norm": 120.39441897126666, "learning_rate": 3.6304984572266345e-07, "loss": 0.1859, "step": 9130, "success_rate.epoch.env.abd": 0.9874371859296482, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9454094292803971, "success_rate.epoch.env.logic": 0.8990825688073395, "success_rate.epoch.env.math": 0.9701670644391408, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8406466512702079, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8690514385462129, "success_rate.epoch.global": 0.9007788689468337, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947630494505495, "tokens_p.mean_in_band": 0.8138020833333334, "tokens_rate.above_band": 0.9918256130790191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008174386920980926 }, { "epoch": 1.946101406050277, "grad_norm": 0.0, "learning_rate": 3.630157962724148e-07, "loss": 0.3496, "step": 9135, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9455445544554455, "success_rate.epoch.env.logic": 0.8991596638655462, "success_rate.epoch.env.math": 0.9702144559173947, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8407845399480819, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8690904372861159, "success_rate.epoch.global": 0.9008907430375465, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0002088903743316, "tokens_p.mean_in_band": 0.876953125, "tokens_rate.above_band": 0.9973333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0026666666666666666 }, { "epoch": 1.9471665956540263, "grad_norm": 276.0311235604553, "learning_rate": 3.629817446242756e-07, "loss": 0.3399, "step": 9140, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.9678571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.945679012345679, "success_rate.epoch.env.logic": 0.8993135011441648, "success_rate.epoch.env.math": 0.9702499008330028, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8405880657249928, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8691511321318132, "success_rate.epoch.global": 0.9008897398355671, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994762569832403, "tokens_p.mean_in_band": 0.6276041666666666, "tokens_rate.above_band": 0.9916897506925207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008310249307479225 }, { "epoch": 1.948231785257776, "grad_norm": 216.7601198118111, "learning_rate": 3.629476908030623e-07, "loss": 0.3421, "step": 9145, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.9678571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.8987052551408987, "success_rate.epoch.env.math": 0.9702852614896988, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8407258064516129, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8691358402209328, "success_rate.epoch.global": 0.9008887388907638, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979811946902655, "tokens_p.mean_in_band": 0.65, "tokens_rate.above_band": 0.9868995633187773, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013100436681222707 }, { "epoch": 1.9492969748615252, "grad_norm": 118.96363195648838, "learning_rate": 3.629136348335931e-07, "loss": 0.2794, "step": 9150, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, "success_rate.epoch.env.agentgym:sciworld": 0.9679715302491103, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.898936170212766, "success_rate.epoch.env.math": 0.9702970297029703, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8405296488198043, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8691960219700569, "success_rate.epoch.global": 0.9008877401955276, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998989218328841, "tokens_p.mean_in_band": 0.576171875, "tokens_rate.above_band": 0.9840848806366048, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015915119363395226 }, { "epoch": 1.9503621644652749, "grad_norm": 83.31949908768537, "learning_rate": 3.628795767406878e-07, "loss": 0.209, "step": 9155, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.898936170212766, "success_rate.epoch.env.math": 0.970344009489917, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8407588387467663, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.869231453410764, "success_rate.epoch.global": 0.9009989897856101, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964015151515152, "tokens_p.mean_in_band": 0.8268229166666666, "tokens_rate.above_band": 0.9821428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017857142857142856 }, { "epoch": 1.9514273540690241, "grad_norm": 199.44421113391073, "learning_rate": 3.6284551654916765e-07, "loss": 0.1286, "step": 9160, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9830508474576272, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.946078431372549, "success_rate.epoch.env.logic": 0.8990129081245254, "success_rate.epoch.env.math": 0.9703791469194313, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8408960367604825, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8693323011943324, "success_rate.epoch.global": 0.9011099899091827, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981997784342689, "tokens_p.mean_in_band": 0.8313802083333334, "tokens_rate.above_band": 0.9955882352941177, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004411764705882353 }, { "epoch": 1.9524925436727738, "grad_norm": 76.79437987446578, "learning_rate": 3.628114542838555e-07, "loss": 0.3887, "step": 9165, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, "success_rate.epoch.env.agentgym:sciworld": 0.9681978798586572, "success_rate.epoch.env.agentgym:textcraft": 0.9833333333333333, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9463414634146341, "success_rate.epoch.env.logic": 0.8990129081245254, "success_rate.epoch.env.math": 0.9703908409001184, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8409873708381171, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8694436523205368, "success_rate.epoch.global": 0.9012096774193549, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972426470588235, "tokens_p.mean_in_band": 0.8834635416666666, "tokens_rate.above_band": 0.9941520467836257, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005847953216374269 }, { "epoch": 1.9535577332765233, "grad_norm": 332.6171389137624, "learning_rate": 3.627773899695756e-07, "loss": 0.2785, "step": 9170, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9463414634146341, "success_rate.epoch.env.logic": 0.8990895295902883, "success_rate.epoch.env.math": 0.9704258675078864, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8408373960424433, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8694852950563998, "success_rate.epoch.global": 0.9012083240098456, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991472712680578, "tokens_p.mean_in_band": 0.5528846153846154, "tokens_rate.above_band": 0.9795597484276729, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020440251572327043 }, { "epoch": 1.9546229228802727, "grad_norm": 121.59459115619276, "learning_rate": 3.6274332363115396e-07, "loss": 0.2806, "step": 9175, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, "success_rate.epoch.env.agentgym:sciworld": 0.9685314685314685, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9464720194647201, "success_rate.epoch.env.logic": 0.8991660348749052, "success_rate.epoch.env.math": 0.9704608113430484, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8409742120343839, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8695668769043091, "success_rate.epoch.global": 0.901318730442557, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0002700950734658, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.9991364421416234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0008635578583765112 }, { "epoch": 1.9556881124840222, "grad_norm": 37.136455214458834, "learning_rate": 3.627092552934177e-07, "loss": 0.1691, "step": 9180, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9464720194647201, "success_rate.epoch.env.logic": 0.8992424242424243, "success_rate.epoch.env.math": 0.9705188679245284, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8410197651102835, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693286766902474, "success_rate.epoch.global": 0.9013172583165885, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973030018761726, "tokens_p.mean_in_band": 0.7259114583333334, "tokens_rate.above_band": 0.9944029850746269, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005597014925373134 }, { "epoch": 1.9567533020877717, "grad_norm": 60.58211295407395, "learning_rate": 3.6267518498119573e-07, "loss": 0.3777, "step": 9185, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9464720194647201, "success_rate.epoch.env.logic": 0.8987915407854985, "success_rate.epoch.env.math": 0.9705535924617197, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8411107930146007, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693089494604721, "success_rate.epoch.global": 0.9013157894736842, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978829160530192, "tokens_p.mean_in_band": 0.390625, "tokens_rate.above_band": 0.9713876967095851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02861230329041488 }, { "epoch": 1.9578184916915211, "grad_norm": 146.93966578858831, "learning_rate": 3.626411127193181e-07, "loss": 0.2278, "step": 9190, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9464720194647201, "success_rate.epoch.env.logic": 0.8989441930618401, "success_rate.epoch.env.math": 0.9705882352941176, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8412925364598227, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693823566538803, "success_rate.epoch.global": 0.9014257072844731, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993528106508875, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9970501474926253, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0029498525073746312 }, { "epoch": 1.9588836812952706, "grad_norm": 85.65704990242106, "learning_rate": 3.626070385326165e-07, "loss": 0.2422, "step": 9195, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9464720194647201, "success_rate.epoch.env.logic": 0.8990963855421686, "success_rate.epoch.env.math": 0.9706227967097533, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8412335808109651, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693939746763445, "success_rate.epoch.global": 0.9014241210502892, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942528735632183, "tokens_p.mean_in_band": 0.666015625, "tokens_rate.above_band": 0.9157894736842105, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08421052631578947 }, { "epoch": 1.95994887089902, "grad_norm": 93.35941200463283, "learning_rate": 3.62572962445924e-07, "loss": 0.3115, "step": 9200, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9464720194647201, "success_rate.epoch.env.logic": 0.8991723100075244, "success_rate.epoch.env.math": 0.9706572769953051, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8409350057012542, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693768682800896, "success_rate.epoch.global": 0.9013114025338964, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9883177570093458, "tokens_p.mean_in_band": 0.5505756578947368, "tokens_rate.above_band": 0.8492063492063492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15079365079365079 }, { "epoch": 1.9610140605027695, "grad_norm": 744.4963052442677, "learning_rate": 3.625388844840749e-07, "loss": 0.2262, "step": 9205, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9464720194647201, "success_rate.epoch.env.logic": 0.8992481203007519, "success_rate.epoch.env.math": 0.9706802189210321, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8409220261809903, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693944281393787, "success_rate.epoch.global": 0.901309946714032, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921370967741936, "tokens_p.mean_in_band": 0.5830078125, "tokens_rate.above_band": 0.9748427672955975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025157232704402517 }, { "epoch": 1.962079250106519, "grad_norm": 119.62004429252833, "learning_rate": 3.6250480467190505e-07, "loss": 0.257, "step": 9210, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9466019417475728, "success_rate.epoch.env.logic": 0.8993993993993994, "success_rate.epoch.env.math": 0.970714564623194, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8408186469584992, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.869413716126758, "success_rate.epoch.global": 0.9013084941228654, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982876712328768, "tokens_p.mean_in_band": 0.6011284722222222, "tokens_rate.above_band": 0.9700996677740864, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029900332225913623 }, { "epoch": 1.9631444397102684, "grad_norm": 245.02400293712606, "learning_rate": 3.624707230342516e-07, "loss": 0.248, "step": 9215, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9467312348668281, "success_rate.epoch.env.logic": 0.8994748687171793, "success_rate.epoch.env.math": 0.9707374170893485, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8408059023836549, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8694332497929712, "success_rate.epoch.global": 0.9013070447496677, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998584142394822, "tokens_p.mean_in_band": 0.7509765625, "tokens_rate.above_band": 0.987220447284345, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012779552715654952 }, { "epoch": 1.9642096293140179, "grad_norm": 211.04879472712864, "learning_rate": 3.6243663959595295e-07, "loss": 0.214, "step": 9220, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9467312348668281, "success_rate.epoch.env.logic": 0.8995502248875562, "success_rate.epoch.env.math": 0.9708057609964967, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8406577828182591, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8694328480213467, "success_rate.epoch.global": 0.9013055985837575, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948422330097088, "tokens_p.mean_in_band": 0.4211647727272727, "tokens_rate.above_band": 0.9035087719298246, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09649122807017543 }, { "epoch": 1.9652748189177673, "grad_norm": 118.7322997757117, "learning_rate": 3.6240255438184877e-07, "loss": 0.3001, "step": 9225, "success_rate.epoch.env.abd": 0.9875930521091811, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9468599033816425, "success_rate.epoch.env.logic": 0.8997005988023952, "success_rate.epoch.env.math": 0.9708171206225681, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8408833522083805, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8694825602486468, "success_rate.epoch.global": 0.9014146772767463, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969929963459196, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.992744860943168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007255139056831923 }, { "epoch": 1.9663400085215168, "grad_norm": 103.92445388398006, "learning_rate": 3.623684674167803e-07, "loss": 0.1225, "step": 9230, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9468599033816425, "success_rate.epoch.env.logic": 0.8997756170531039, "success_rate.epoch.env.math": 0.9708511465215701, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8407805429864253, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8694956141488182, "success_rate.epoch.global": 0.9014131154780305, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938979289940828, "tokens_p.mean_below_band": 1.6555645743210334e-12, "tokens_p.mean_in_band": 0.8265625, "tokens_rate.above_band": 0.9657142857142857, "tokens_rate.below_band": 0.005714285714285714, "tokens_rate.in_band": 0.02857142857142857 }, { "epoch": 1.9674051981252663, "grad_norm": 146.4281118127438, "learning_rate": 3.6233437872558985e-07, "loss": 0.2767, "step": 9235, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9468599033816425, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9708850931677019, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8406779661016949, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8695097734859363, "success_rate.epoch.global": 0.9014115571239524, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978530534351145, "tokens_p.mean_in_band": 0.5546875, "tokens_rate.above_band": 0.916083916083916, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08391608391608392 }, { "epoch": 1.9684703877290157, "grad_norm": 118.09468061866858, "learning_rate": 3.623002883331209e-07, "loss": 0.1381, "step": 9240, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9468599033816425, "success_rate.epoch.env.logic": 0.9000745712155108, "success_rate.epoch.env.math": 0.9709414955443626, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8407679277244494, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8695490504602074, "success_rate.epoch.global": 0.9015201586252478, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.000330250990753, "tokens_p.mean_in_band": 0.8229166666666666, "tokens_rate.above_band": 0.9921363040629095, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007863695937090432 }, { "epoch": 1.9695355773327652, "grad_norm": 173.38182405899335, "learning_rate": 3.6226619626421837e-07, "loss": 0.2257, "step": 9245, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.8737201365187713, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9468599033816425, "success_rate.epoch.env.logic": 0.8994787788533135, "success_rate.epoch.env.math": 0.9709752321981424, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8408128704487722, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693265514501611, "success_rate.epoch.global": 0.9014084507042254, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9996476582827407, "tokens_p.mean_in_band": 0.488031914893617, "tokens_rate.above_band": 0.9608333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03916666666666667 }, { "epoch": 1.9706007669365146, "grad_norm": 133.41869435186206, "learning_rate": 3.622321025437282e-07, "loss": 0.358, "step": 9250, "success_rate.epoch.env.abd": 0.9876543209876543, "success_rate.epoch.env.agentgym:alfworld": 0.8741496598639455, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.946987951807229, "success_rate.epoch.env.logic": 0.8994787788533135, "success_rate.epoch.env.math": 0.9710200927357032, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8409475465313029, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8693963393594584, "success_rate.epoch.global": 0.9015168168828314, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968549250535332, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9915074309978769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008492569002123142 }, { "epoch": 1.971665956540264, "grad_norm": 162.71428128857067, "learning_rate": 3.6219800719649785e-07, "loss": 0.3732, "step": 9255, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.946987951807229, "success_rate.epoch.env.logic": 0.8995535714285714, "success_rate.epoch.env.math": 0.970679012345679, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8410819949281487, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8691177347660478, "success_rate.epoch.global": 0.9014053579270971, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99748322147651, "tokens_p.mean_in_band": 0.63330078125, "tokens_rate.above_band": 0.9738562091503268, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026143790849673203 }, { "epoch": 1.9727311461440138, "grad_norm": 141.1740562317876, "learning_rate": 3.6216391024737555e-07, "loss": 0.2552, "step": 9260, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.946987951807229, "success_rate.epoch.env.logic": 0.899702823179792, "success_rate.epoch.env.math": 0.9706903200925568, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8410689170182841, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.869140575848768, "success_rate.epoch.global": 0.9014038166264532, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990808823529411, "tokens_p.mean_in_band": 0.583984375, "tokens_rate.above_band": 0.9883720930232558, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011627906976744186 }, { "epoch": 1.973796335747763, "grad_norm": 230.68805083661985, "learning_rate": 3.621298117212111e-07, "loss": 0.2407, "step": 9265, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.946987951807229, "success_rate.epoch.env.logic": 0.8991097922848664, "success_rate.epoch.env.math": 0.9707354639969196, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8406408094435076, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.869061218994725, "success_rate.epoch.global": 0.9011831726555652, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9962337133550488, "tokens_p.mean_below_band": 7.729977369308472e-08, "tokens_p.mean_in_band": 0.6416015625, "tokens_rate.above_band": 0.9489953632148377, "tokens_rate.below_band": 0.0015455950540958269, "tokens_rate.in_band": 0.04945904173106646 }, { "epoch": 1.9748615253515127, "grad_norm": 80.44849496360418, "learning_rate": 3.620957116428551e-07, "loss": 0.1594, "step": 9270, "success_rate.epoch.env.abd": 0.9877149877149877, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9471153846153846, "success_rate.epoch.env.logic": 0.899184581171238, "success_rate.epoch.env.math": 0.9707692307692307, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8408197641774284, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8691016918903347, "success_rate.epoch.global": 0.9012913110089735, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985518292682927, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9975669099756691, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0024330900243309003 }, { "epoch": 1.975926714955262, "grad_norm": 97.39905145173371, "learning_rate": 3.6206161003715956e-07, "loss": 0.1979, "step": 9275, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.947242206235012, "success_rate.epoch.env.logic": 0.8994082840236687, "success_rate.epoch.env.math": 0.9708029197080292, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8406285072951739, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8691219706951375, "success_rate.epoch.global": 0.9012898994315698, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967364091559371, "tokens_p.mean_in_band": 0.6460336538461539, "tokens_rate.above_band": 0.9817415730337079, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018258426966292134 }, { "epoch": 1.9769919045590116, "grad_norm": 77.5166022700115, "learning_rate": 3.620275069289775e-07, "loss": 0.322, "step": 9280, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.947242206235012, "success_rate.epoch.env.logic": 0.8995568685376661, "success_rate.epoch.env.math": 0.9708141321044547, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8405715886803026, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.869206535795315, "success_rate.epoch.global": 0.9012884909368858, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964583333333333, "tokens_p.mean_in_band": 0.5484375, "tokens_rate.above_band": 0.9836065573770492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01639344262295082 }, { "epoch": 1.9780570941627609, "grad_norm": 125.99951134193954, "learning_rate": 3.619934023431629e-07, "loss": 0.5176, "step": 9285, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9449760765550239, "success_rate.epoch.env.logic": 0.8995568685376661, "success_rate.epoch.env.math": 0.9708365310821182, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8403243847874721, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8689800871957555, "success_rate.epoch.global": 0.9010689354275742, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9954551575545381, "tokens_p.mean_below_band": 8.307397365570068e-07, "tokens_p.mean_in_band": 0.47935267857142855, "tokens_rate.above_band": 0.9530287474332649, "tokens_rate.below_band": 0.0002566735112936345, "tokens_rate.in_band": 0.04671457905544148 }, { "epoch": 1.9791222837665106, "grad_norm": 92.51467006729722, "learning_rate": 3.6195929630457095e-07, "loss": 0.2159, "step": 9290, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9449760765550239, "success_rate.epoch.env.logic": 0.8998527245949927, "success_rate.epoch.env.math": 0.9708588957055214, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8399441340782123, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8689744481022527, "success_rate.epoch.global": 0.9009588145565482, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9923611111111111, "tokens_p.mean_in_band": 0.6057692307692307, "tokens_rate.above_band": 0.8737864077669902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1262135922330097 }, { "epoch": 1.9801874733702598, "grad_norm": 108.66737695307832, "learning_rate": 3.619251888380579e-07, "loss": 0.2765, "step": 9295, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9451073985680191, "success_rate.epoch.env.logic": 0.8998527245949927, "success_rate.epoch.env.math": 0.9709146574818217, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8400335008375209, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8690779072298429, "success_rate.epoch.global": 0.901066608619939, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985850192678227, "tokens_p.mean_in_band": 0.6751302083333334, "tokens_rate.above_band": 0.9971181556195965, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002881844380403458 }, { "epoch": 1.9812526629740095, "grad_norm": 146.58759284466217, "learning_rate": 3.618910799684812e-07, "loss": 0.2646, "step": 9300, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9451073985680191, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9709369024856597, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8400222965440357, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8690922996039666, "success_rate.epoch.global": 0.9010654490106544, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901315789473685, "tokens_p.mean_in_band": 0.7326388888888888, "tokens_rate.above_band": 0.9134615384615384, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08653846153846154 }, { "epoch": 1.9823178525777587, "grad_norm": 31.659030602726542, "learning_rate": 3.6185696972069894e-07, "loss": 0.1954, "step": 9305, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9591836734693877, "success_rate.epoch.env.ded": 0.9451073985680191, "success_rate.epoch.env.logic": 0.9002201027146002, "success_rate.epoch.env.math": 0.9709923664122138, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.840066870994706, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8691307101670755, "success_rate.epoch.global": 0.9011728931364031, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969284188034188, "tokens_p.mean_in_band": 0.712890625, "tokens_rate.above_band": 0.9669421487603306, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03305785123966942 }, { "epoch": 1.9833830421815084, "grad_norm": 131.04405700264704, "learning_rate": 3.618228581195705e-07, "loss": 0.4417, "step": 9310, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9697986577181208, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9451073985680191, "success_rate.epoch.env.logic": 0.9002201027146002, "success_rate.epoch.env.math": 0.9710365853658537, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8399666017255775, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8692090705521914, "success_rate.epoch.global": 0.9011716207420265, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945652173913043, "tokens_p.mean_in_band": 0.4147135416666667, "tokens_rate.above_band": 0.968421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031578947368421054 }, { "epoch": 1.9844482317852576, "grad_norm": 137.595617783164, "learning_rate": 3.617887451899561e-07, "loss": 0.2663, "step": 9315, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9697986577181208, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9452380952380952, "success_rate.epoch.env.logic": 0.9002201027146002, "success_rate.epoch.env.math": 0.9711026615969581, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8398220244716351, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8692138156110314, "success_rate.epoch.global": 0.9011703511053316, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968398876404494, "tokens_p.mean_in_band": 0.45, "tokens_rate.above_band": 0.9726775956284153, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0273224043715847 }, { "epoch": 1.9855134213890073, "grad_norm": 88.22679644944432, "learning_rate": 3.617546309567172e-07, "loss": 0.351, "step": 9320, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9697986577181208, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9452380952380952, "success_rate.epoch.env.logic": 0.9004392386530015, "success_rate.epoch.env.math": 0.9711355867831372, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8396776882467352, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8692263327005914, "success_rate.epoch.global": 0.9011690842173631, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.991504854368932, "tokens_p.mean_in_band": 0.5872395833333334, "tokens_rate.above_band": 0.9196428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08035714285714286 }, { "epoch": 1.9865786109927566, "grad_norm": 457.20328575149074, "learning_rate": 3.6172051544471575e-07, "loss": 0.241, "step": 9325, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9452380952380952, "success_rate.epoch.env.logic": 0.9005120702267739, "success_rate.epoch.env.math": 0.9711793704967766, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8398112159911161, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.869267376820016, "success_rate.epoch.global": 0.9012759515570934, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976801310043668, "tokens_p.mean_in_band": 0.408203125, "tokens_rate.above_band": 0.9956521739130435, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004347826086956522 }, { "epoch": 1.9876438005965062, "grad_norm": 456.1231754450967, "learning_rate": 3.616863986788151e-07, "loss": 0.4675, "step": 9330, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9452380952380952, "success_rate.epoch.env.logic": 0.9006574141709277, "success_rate.epoch.env.math": 0.9712121212121212, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.839478791239257, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8692533468116196, "success_rate.epoch.global": 0.9011665586519767, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941666666666666, "tokens_p.mean_in_band": 0.51241455078125, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0625 }, { "epoch": 1.9887089902002555, "grad_norm": 24.228203276889616, "learning_rate": 3.616522806838791e-07, "loss": 0.2454, "step": 9335, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9453681710213777, "success_rate.epoch.env.logic": 0.9007299270072993, "success_rate.epoch.env.math": 0.9712230215827338, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8396566048186098, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8693349705547356, "success_rate.epoch.global": 0.9012731981009927, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977409638554217, "tokens_p.mean_in_band": 0.6822916666666666, "tokens_rate.above_band": 0.9940119760479041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005988023952095809 }, { "epoch": 1.9897741798040052, "grad_norm": 63.15318199117054, "learning_rate": 3.6161816148477287e-07, "loss": 0.1866, "step": 9340, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9457547169811321, "success_rate.epoch.env.logic": 0.9008023340627279, "success_rate.epoch.env.math": 0.9712447975785092, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8398340248962656, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8693948022900643, "success_rate.epoch.global": 0.9013796076740677, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999234068627451, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.9908393694077546, "grad_norm": 90.98323883013198, "learning_rate": 3.6158404110636217e-07, "loss": 0.3974, "step": 9345, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9457547169811321, "success_rate.epoch.env.logic": 0.9008746355685131, "success_rate.epoch.env.math": 0.9709104646769928, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8400110527770103, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8693960753998474, "success_rate.epoch.global": 0.9013781223083549, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966793168880456, "tokens_p.mean_in_band": 0.71796875, "tokens_rate.above_band": 0.9906015037593985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009398496240601503 }, { "epoch": 1.991904559011504, "grad_norm": 89.82657481925854, "learning_rate": 3.615499195735137e-07, "loss": 0.213, "step": 9350, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9607843137254902, "success_rate.epoch.env.ded": 0.9458823529411765, "success_rate.epoch.env.logic": 0.9009468317552805, "success_rate.epoch.env.math": 0.970954356846473, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.84009942004971, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8695305526741156, "success_rate.epoch.global": 0.901484190148419, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998070987654321, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.9929697486152536, "grad_norm": 55.59520445288831, "learning_rate": 3.6151579691109497e-07, "loss": 0.4466, "step": 9355, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9607843137254902, "success_rate.epoch.env.ded": 0.9458823529411765, "success_rate.epoch.env.logic": 0.9012345679012346, "success_rate.epoch.env.math": 0.970954356846473, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8403639371381307, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8695807575136043, "success_rate.epoch.global": 0.9015900300816502, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99390625, "tokens_p.mean_in_band": 0.7368706597222222, "tokens_rate.above_band": 0.9174311926605505, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08256880733944955 }, { "epoch": 1.994034938219003, "grad_norm": 46.508304769784004, "learning_rate": 3.6148167314397433e-07, "loss": 0.2788, "step": 9360, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9615384615384616, "success_rate.epoch.env.ded": 0.9461358313817331, "success_rate.epoch.env.logic": 0.9013062409288825, "success_rate.epoch.env.math": 0.9709871891484552, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8404958677685951, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.869693854078479, "success_rate.epoch.global": 0.9016956428418116, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978419654714475, "tokens_p.mean_in_band": 0.51171875, "tokens_rate.above_band": 0.9973509933774835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0026490066225165563 }, { "epoch": 1.9951001278227525, "grad_norm": 174.8542830108639, "learning_rate": 3.61447548297021e-07, "loss": 0.3549, "step": 9365, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9622641509433962, "success_rate.epoch.env.ded": 0.9461358313817331, "success_rate.epoch.env.logic": 0.9013778100072516, "success_rate.epoch.env.math": 0.9710199473089951, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8404401650618982, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8697642462545833, "success_rate.epoch.global": 0.9016938250428816, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9914772727272727, "tokens_p.mean_in_band": 0.49296875, "tokens_rate.above_band": 0.8897058823529411, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11029411764705882 }, { "epoch": 1.996165317426502, "grad_norm": 70.97149395509877, "learning_rate": 3.6141342239510485e-07, "loss": 0.3877, "step": 9370, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9622641509433962, "success_rate.epoch.env.ded": 0.9461358313817331, "success_rate.epoch.env.logic": 0.9014492753623189, "success_rate.epoch.env.math": 0.9706987227648385, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8402529557327467, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8697299036275724, "success_rate.epoch.global": 0.9015849218248019, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965, "tokens_p.mean_in_band": 0.3030790441176471, "tokens_rate.above_band": 0.8802816901408451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11971830985915492 }, { "epoch": 1.9972305070302514, "grad_norm": 115.82174745572928, "learning_rate": 3.6137929546309664e-07, "loss": 0.4441, "step": 9375, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9622641509433962, "success_rate.epoch.env.ded": 0.9461358313817331, "success_rate.epoch.env.logic": 0.9007965242577842, "success_rate.epoch.env.math": 0.9707536557930259, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8403846153846154, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8696964668804636, "success_rate.epoch.global": 0.9015832263585793, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969490521327015, "tokens_p.mean_in_band": 0.41342905405405406, "tokens_rate.above_band": 0.9661172161172161, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03388278388278388 }, { "epoch": 1.9982956966340009, "grad_norm": 556.2684394186227, "learning_rate": 3.613451675258678e-07, "loss": 0.5335, "step": 9380, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9622641509433962, "success_rate.epoch.env.ded": 0.9461358313817331, "success_rate.epoch.env.logic": 0.9007965242577842, "success_rate.epoch.env.math": 0.9707865168539326, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8403292181069959, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8697717010295647, "success_rate.epoch.global": 0.9015815345159222, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985166139240507, "tokens_p.mean_in_band": 0.6189903846153846, "tokens_rate.above_band": 0.9798449612403101, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020155038759689922 }, { "epoch": 1.9993608862377503, "grad_norm": 234.36914750080672, "learning_rate": 3.613110386082904e-07, "loss": 0.4179, "step": 9385, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9838709677419355, "success_rate.epoch.env.babyai": 0.9622641509433962, "success_rate.epoch.env.ded": 0.9462616822429907, "success_rate.epoch.env.logic": 0.9008683068017366, "success_rate.epoch.env.math": 0.9708083832335329, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.84, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8697857639425375, "success_rate.epoch.global": 0.9014730999146029, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946996466431095, "tokens_p.mean_in_band": 0.5830078125, "tokens_rate.above_band": 0.9593220338983051, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04067796610169491 }, { "epoch": 2.0004260758415, "grad_norm": 320.926825579749, "learning_rate": 3.612769087352377e-07, "loss": 0.4256, "step": 9390, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999234693877551, "tokens_p.mean_in_band": 0.259765625, "tokens_rate.above_band": 0.9959349593495935, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0040650406504065045 }, { "epoch": 2.0014912654452495, "grad_norm": 340.63416924544634, "learning_rate": 3.6124277793158297e-07, "loss": 0.5056, "step": 9395, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.6666666666666666, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env_macro_mean": 0.8333333333333333, "success_rate.epoch.global": 0.8, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990040307461567, "tokens_p.mean_in_band": 0.60986328125, "tokens_rate.above_band": 0.9881437569470174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011856243052982586 }, { "epoch": 2.0025564550489987, "grad_norm": 243.60823813088356, "learning_rate": 3.612086462222006e-07, "loss": 0.6777, "step": 9400, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.6666666666666666, "success_rate.epoch.env.math": 0.8, "success_rate.epoch.env.science": 0.4, "success_rate.epoch.env_macro_mean": 0.811111111111111, "success_rate.epoch.global": 0.75, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7708333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9935233160621761, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_p.mean_in_band": 0.6473214285714286, "tokens_rate.above_band": 0.9633943427620633, "tokens_rate.below_band": 0.0016638935108153079, "tokens_rate.in_band": 0.03494176372712146 }, { "epoch": 2.0036216446527484, "grad_norm": 212.36509304422106, "learning_rate": 3.611745136319656e-07, "loss": 0.6162, "step": 9405, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8, "success_rate.epoch.env.science": 0.375, "success_rate.epoch.env_macro_mean": 0.7791666666666667, "success_rate.epoch.global": 0.7, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9939516129032258, "tokens_p.mean_in_band": 0.5398763020833334, "tokens_rate.above_band": 0.8857142857142857, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11428571428571428 }, { "epoch": 2.0046868342564976, "grad_norm": 344.2226449012985, "learning_rate": 3.611403801857535e-07, "loss": 0.8166, "step": 9410, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8461538461538461, "success_rate.epoch.env.science": 0.46153846153846156, "success_rate.epoch.env_macro_mean": 0.8012820512820514, "success_rate.epoch.global": 0.725, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986702127659575, "tokens_p.mean_in_band": 0.59619140625, "tokens_rate.above_band": 0.9778085991678225, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022191400832177532 }, { "epoch": 2.0057520238602473, "grad_norm": 137.40345291730455, "learning_rate": 3.6110624590844047e-07, "loss": 0.6818, "step": 9415, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.4, "success_rate.epoch.env.math": 0.7857142857142857, "success_rate.epoch.env.science": 0.5263157894736842, "success_rate.epoch.env_macro_mean": 0.8390037593984963, "success_rate.epoch.global": 0.7, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9978321771611526, "tokens_p.mean_in_band": 0.5275135869565217, "tokens_rate.above_band": 0.953204476093591, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04679552390640895 }, { "epoch": 2.0068172134639966, "grad_norm": 249.94363670988278, "learning_rate": 3.6107211082490344e-07, "loss": 0.5498, "step": 9420, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.4, "success_rate.epoch.env.math": 0.8125, "success_rate.epoch.env.science": 0.56, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636111111111111, "success_rate.epoch.global": 0.7166666666666667, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983766233766234, "tokens_p.mean_in_band": 0.3703125, "tokens_rate.above_band": 0.9685534591194969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031446540880503145 }, { "epoch": 2.0078824030677462, "grad_norm": 131.17384422636022, "learning_rate": 3.610379749600197e-07, "loss": 0.41, "step": 9425, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.85, "success_rate.epoch.env.science": 0.6206896551724138, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8671136653895274, "success_rate.epoch.global": 0.7428571428571429, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974681712962963, "tokens_p.mean_in_band": 0.7034696691176471, "tokens_rate.above_band": 0.927038626609442, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07296137339055794 }, { "epoch": 2.0089475926714955, "grad_norm": 205.8439565836018, "learning_rate": 3.610038383386673e-07, "loss": 0.6034, "step": 9430, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.8, "success_rate.epoch.env.science": 0.5454545454545454, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8531986531986532, "success_rate.epoch.global": 0.7, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.6, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9955255681818181, "tokens_p.mean_in_band": 0.4512939453125, "tokens_rate.above_band": 0.953757225433526, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046242774566473986 }, { "epoch": 2.010012782275245, "grad_norm": 107.20687856202514, "learning_rate": 3.609697009857247e-07, "loss": 0.4466, "step": 9435, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.8275862068965517, "success_rate.epoch.env.science": 0.5945945945945946, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8617237927582756, "success_rate.epoch.global": 0.7333333333333333, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956617647058823, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.9883720930232558, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011627906976744186 }, { "epoch": 2.0110779718789944, "grad_norm": 142.3629123005929, "learning_rate": 3.60935562926071e-07, "loss": 0.5068, "step": 9440, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.42857142857142855, "success_rate.epoch.env.math": 0.8484848484848485, "success_rate.epoch.env.science": 0.5789473684210527, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8856003645477329, "success_rate.epoch.global": 0.75, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997650375939849, "tokens_p.mean_in_band": 0.5494791666666666, "tokens_rate.above_band": 0.9866468842729971, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013353115727002967 }, { "epoch": 2.012143161482744, "grad_norm": 62.622488581704104, "learning_rate": 3.609014241845858e-07, "loss": 0.4964, "step": 9445, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.4444444444444444, "success_rate.epoch.env.math": 0.8648648648648649, "success_rate.epoch.env.science": 0.5952380952380952, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8904547404547405, "success_rate.epoch.global": 0.7545454545454545, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952713815789473, "tokens_p.mean_in_band": 0.548046875, "tokens_rate.above_band": 0.9529780564263323, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047021943573667714 }, { "epoch": 2.0132083510864933, "grad_norm": 126.28369202278844, "learning_rate": 3.6086728478614904e-07, "loss": 0.6079, "step": 9450, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9090909090909091, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.8717948717948718, "success_rate.epoch.env.science": 0.5869565217391305, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8867842302624911, "success_rate.epoch.global": 0.75, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984358706986444, "tokens_p.mean_in_band": 0.33774038461538464, "tokens_rate.above_band": 0.9866255144032922, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013374485596707819 }, { "epoch": 2.014273540690243, "grad_norm": 231.68794549351844, "learning_rate": 3.6083314475564143e-07, "loss": 0.495, "step": 9455, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9166666666666666, "success_rate.epoch.env.logic": 0.45454545454545453, "success_rate.epoch.env.math": 0.8780487804878049, "success_rate.epoch.env.science": 0.5882352941176471, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8837496195817574, "success_rate.epoch.global": 0.7461538461538462, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9988398644667059, "tokens_p.mean_in_band": 0.4778293918918919, "tokens_rate.above_band": 0.9786620530565168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021337946943483274 }, { "epoch": 2.0153387302939922, "grad_norm": 81.89482890135903, "learning_rate": 3.6079900411794387e-07, "loss": 0.4829, "step": 9460, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.46153846153846156, "success_rate.epoch.env.math": 0.8863636363636364, "success_rate.epoch.env.science": 0.6037735849056604, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8880247111379187, "success_rate.epoch.global": 0.7571428571428571, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988046448087432, "tokens_p.mean_in_band": 0.4322916666666667, "tokens_rate.above_band": 0.9838709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016129032258064516 }, { "epoch": 2.016403919897742, "grad_norm": 191.7094748697114, "learning_rate": 3.607648628979379e-07, "loss": 0.4237, "step": 9465, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5333333333333333, "success_rate.epoch.env.math": 0.8958333333333334, "success_rate.epoch.env.science": 0.6071428571428571, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8964880952380951, "success_rate.epoch.global": 0.7666666666666667, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959239130434783, "tokens_p.mean_in_band": 0.5600328947368421, "tokens_rate.above_band": 0.8789808917197452, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12101910828025478 }, { "epoch": 2.017469109501491, "grad_norm": 176.1966454416567, "learning_rate": 3.607307211205053e-07, "loss": 0.5309, "step": 9470, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 0.9056603773584906, "success_rate.epoch.env.science": 0.6101694915254238, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8944401297455343, "success_rate.epoch.global": 0.76875, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0006017329910142, "tokens_p.mean_in_band": 0.4693287037037037, "tokens_rate.above_band": 0.966501240694789, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033498759305210915 }, { "epoch": 2.018534299105241, "grad_norm": 239.3430133453057, "learning_rate": 3.6069657881052844e-07, "loss": 0.2041, "step": 9475, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9122807017543859, "success_rate.epoch.env.science": 0.6129032258064516, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9009310911687823, "success_rate.epoch.global": 0.7764705882352941, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963803088803089, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.9847908745247148, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015209125475285171 }, { "epoch": 2.01959948870899, "grad_norm": 172.39458477329816, "learning_rate": 3.606624359928899e-07, "loss": 0.361, "step": 9480, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6190476190476191, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.science": 0.625, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9089285714285713, "success_rate.epoch.global": 0.7888888888888889, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991159830268741, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.997179125528914, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0028208744710860366 }, { "epoch": 2.0206646783127398, "grad_norm": 90.98167587573818, "learning_rate": 3.606282926924728e-07, "loss": 0.2571, "step": 9485, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6363636363636364, "success_rate.epoch.env.math": 0.9206349206349206, "success_rate.epoch.env.science": 0.6417910447761194, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9127361030346105, "success_rate.epoch.global": 0.8, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998546511627907, "tokens_p.mean_in_band": 0.7591145833333334, "tokens_rate.above_band": 0.9862385321100917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013761467889908258 }, { "epoch": 2.021729867916489, "grad_norm": 115.3975355084232, "learning_rate": 3.6059414893416054e-07, "loss": 0.4624, "step": 9490, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6521739130434783, "success_rate.epoch.env.math": 0.9242424242424242, "success_rate.epoch.env.science": 0.625, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9129987765857331, "success_rate.epoch.global": 0.795, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9957545518207283, "tokens_p.mean_in_band": 0.4736328125, "tokens_rate.above_band": 0.9571045576407506, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04289544235924933 }, { "epoch": 2.0227950575202387, "grad_norm": 722.5825883483019, "learning_rate": 3.605600047428368e-07, "loss": 0.4416, "step": 9495, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.6923076923076923, "success_rate.epoch.env.math": 0.9154929577464789, "success_rate.epoch.env.science": 0.6351351351351351, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9171507213760736, "success_rate.epoch.global": 0.8, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940878378378378, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.961038961038961, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03896103896103896 }, { "epoch": 2.023860247123988, "grad_norm": 89.78032517627625, "learning_rate": 3.6052586014338556e-07, "loss": 0.3562, "step": 9500, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.7037037037037037, "success_rate.epoch.env.math": 0.9210526315789473, "success_rate.epoch.env.science": 0.6363636363636364, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9189691400217717, "success_rate.epoch.global": 0.8045454545454546, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9922680412371134, "tokens_p.mean_in_band": 0.6393229166666666, "tokens_rate.above_band": 0.941747572815534, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05825242718446602 }, { "epoch": 2.0249254367277376, "grad_norm": 96.66100381812466, "learning_rate": 3.6049171516069125e-07, "loss": 0.2178, "step": 9505, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9285714285714286, "success_rate.epoch.env.logic": 0.7037037037037037, "success_rate.epoch.env.math": 0.9240506329113924, "success_rate.epoch.env.science": 0.6419753086419753, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.91983010738285, "success_rate.epoch.global": 0.8078602620087336, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9969978165938864, "tokens_p.mean_in_band": 0.6637834821428571, "tokens_rate.above_band": 0.9423868312757202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05761316872427984 }, { "epoch": 2.025990626331487, "grad_norm": 826.1814779725264, "learning_rate": 3.604575698196385e-07, "loss": 0.4907, "step": 9510, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.7037037037037037, "success_rate.epoch.env.math": 0.926829268292683, "success_rate.epoch.env.science": 0.6309523809523809, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9198985352948768, "success_rate.epoch.global": 0.8075313807531381, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980357142857142, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9641873278236914, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03581267217630854 }, { "epoch": 2.0270558159352365, "grad_norm": 140.5626460194624, "learning_rate": 3.604234241451121e-07, "loss": 0.112, "step": 9515, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.7333333333333333, "success_rate.epoch.env.math": 0.9285714285714286, "success_rate.epoch.env.science": 0.6395348837209303, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9238939645625692, "success_rate.epoch.global": 0.8152610441767069, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965533088235294, "tokens_p.mean_in_band": 0.8411458333333334, "tokens_rate.above_band": 0.9945155393053017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005484460694698354 }, { "epoch": 2.0281210055389858, "grad_norm": 139.14264902908886, "learning_rate": 3.6038927816199726e-07, "loss": 0.4959, "step": 9520, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.7333333333333333, "success_rate.epoch.env.math": 0.9294117647058824, "success_rate.epoch.env.science": 0.6483516483516484, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9255541190835309, "success_rate.epoch.global": 0.8185328185328186, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994817073170732, "tokens_p.mean_in_band": 0.67431640625, "tokens_rate.above_band": 0.9961127308066083, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003887269193391642 }, { "epoch": 2.0291861951427355, "grad_norm": 189.81289108124403, "learning_rate": 3.6035513189517925e-07, "loss": 0.3229, "step": 9525, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7419354838709677, "success_rate.epoch.env.math": 0.9325842696629213, "success_rate.epoch.env.science": 0.6526315789473685, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.927451975353389, "success_rate.epoch.global": 0.8215613382899628, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977313603662524, "tokens_p.mean_in_band": 0.60546875, "tokens_rate.above_band": 0.9960912052117263, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003908794788273616 }, { "epoch": 2.0302513847464847, "grad_norm": 101.57335611204468, "learning_rate": 3.6032098536954376e-07, "loss": 0.3196, "step": 9530, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7575757575757576, "success_rate.epoch.env.math": 0.9340659340659341, "success_rate.epoch.env.science": 0.6435643564356436, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9282574469129967, "success_rate.epoch.global": 0.8172043010752689, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9915602189781022, "tokens_p.mean_in_band": 0.5738636363636364, "tokens_rate.above_band": 0.8616352201257862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13836477987421383 }, { "epoch": 2.0313165743502344, "grad_norm": 110.82828022333625, "learning_rate": 3.6028683860997635e-07, "loss": 0.3032, "step": 9535, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7647058823529411, "success_rate.epoch.env.math": 0.9368421052631579, "success_rate.epoch.env.science": 0.6509433962264151, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9299859804895145, "success_rate.epoch.global": 0.8200692041522492, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938186813186813, "tokens_p.mean_in_band": 0.6361607142857143, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 2.0323817639539836, "grad_norm": 83.98913706399934, "learning_rate": 3.6025269164136306e-07, "loss": 0.23, "step": 9540, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7777777777777778, "success_rate.epoch.env.math": 0.9381443298969072, "success_rate.epoch.env.science": 0.6422018348623854, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9305492363589701, "success_rate.epoch.global": 0.8193979933110368, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954044117647058, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.9272727272727272, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07272727272727272 }, { "epoch": 2.0334469535577333, "grad_norm": 90.99730528111844, "learning_rate": 3.6021854448858993e-07, "loss": 0.3659, "step": 9545, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.7837837837837838, "success_rate.epoch.env.math": 0.94, "success_rate.epoch.env.science": 0.6403508771929824, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9311503082029396, "success_rate.epoch.global": 0.8187702265372169, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994740099009901, "tokens_p.mean_in_band": 0.5028645833333333, "tokens_rate.above_band": 0.8706896551724138, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12931034482758622 }, { "epoch": 2.0345121431614825, "grad_norm": 57.27481021638967, "learning_rate": 3.601843971765431e-07, "loss": 0.3535, "step": 9550, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.7948717948717948, "success_rate.epoch.env.math": 0.9411764705882353, "success_rate.epoch.env.science": 0.652542372881356, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9338590638341386, "success_rate.epoch.global": 0.8244514106583072, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991490166414524, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.0355773327652322, "grad_norm": 41.035762347023855, "learning_rate": 3.6015024973010895e-07, "loss": 0.2693, "step": 9555, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8048780487804879, "success_rate.epoch.env.math": 0.9439252336448598, "success_rate.epoch.env.science": 0.6583333333333333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.935713661575868, "success_rate.epoch.global": 0.8297872340425532, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948275862068966, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9731543624161074, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026845637583892617 }, { "epoch": 2.0366425223689815, "grad_norm": 413.41384942417, "learning_rate": 3.601161021741739e-07, "loss": 0.3851, "step": 9560, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.813953488372093, "success_rate.epoch.env.math": 0.9363636363636364, "success_rate.epoch.env.science": 0.672, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.937231712473573, "success_rate.epoch.global": 0.831858407079646, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992597292724196, "tokens_p.mean_in_band": 0.6588541666666666, "tokens_rate.above_band": 0.9800995024875622, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01990049751243781 }, { "epoch": 2.037707711972731, "grad_norm": 25.091192591618025, "learning_rate": 3.600819545336244e-07, "loss": 0.2506, "step": 9565, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.813953488372093, "success_rate.epoch.env.math": 0.9380530973451328, "success_rate.epoch.env.science": 0.6796875, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9384075038098179, "success_rate.epoch.global": 0.836676217765043, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986092931937173, "tokens_p.mean_in_band": 0.814453125, "tokens_rate.above_band": 0.9982578397212544, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017421602787456446 }, { "epoch": 2.038772901576481, "grad_norm": 124.26799191600831, "learning_rate": 3.6004780683334705e-07, "loss": 0.3578, "step": 9570, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8, "success_rate.epoch.env.math": 0.9391304347826087, "success_rate.epoch.env.science": 0.6742424242424242, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9367918313570488, "success_rate.epoch.global": 0.8328690807799443, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9966267820773931, "tokens_p.mean_in_band": 0.6027901785714286, "tokens_rate.above_band": 0.9655850540806293, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0344149459193707 }, { "epoch": 2.03983809118023, "grad_norm": 74.61564211759006, "learning_rate": 3.600136590982284e-07, "loss": 0.3258, "step": 9575, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8163265306122449, "success_rate.epoch.env.math": 0.9401709401709402, "success_rate.epoch.env.science": 0.6814814814814815, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9392524406810121, "success_rate.epoch.global": 0.8373983739837398, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983606557377049, "tokens_p.mean_in_band": 0.86865234375, "tokens_rate.above_band": 0.9744408945686901, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025559105431309903 }, { "epoch": 2.0409032807839798, "grad_norm": 102.4449027559801, "learning_rate": 3.599795113531551e-07, "loss": 0.4033, "step": 9580, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8163265306122449, "success_rate.epoch.env.math": 0.9411764705882353, "success_rate.epoch.env.science": 0.6830985915492958, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.939514704729523, "success_rate.epoch.global": 0.8364116094986808, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945436507936508, "tokens_p.mean_in_band": 0.5379971590909091, "tokens_rate.above_band": 0.9197080291970803, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08029197080291971 }, { "epoch": 2.041968470387729, "grad_norm": 137.5972515293955, "learning_rate": 3.5994536362301375e-07, "loss": 0.3628, "step": 9585, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.82, "success_rate.epoch.env.math": 0.9426229508196722, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6712328767123288, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.944400116552496, "success_rate.epoch.global": 0.8329048843187661, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9957191780821918, "tokens_p.mean_in_band": 0.51611328125, "tokens_rate.above_band": 0.9319148936170213, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06808510638297872 }, { "epoch": 2.0430336599914787, "grad_norm": 48.459671839727775, "learning_rate": 3.5991121593269107e-07, "loss": 0.2521, "step": 9590, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8235294117647058, "success_rate.epoch.env.math": 0.944, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6754966887417219, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9454134399669876, "success_rate.epoch.global": 0.8345864661654135, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929078014184397, "tokens_p.mean_below_band": 3.583409124985337e-10, "tokens_p.mean_in_band": 0.7880859375, "tokens_rate.above_band": 0.94, "tokens_rate.below_band": 0.006666666666666667, "tokens_rate.in_band": 0.05333333333333334 }, { "epoch": 2.044098849595228, "grad_norm": 125.57216426470644, "learning_rate": 3.5987706830707355e-07, "loss": 0.3249, "step": 9595, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8301886792452831, "success_rate.epoch.env.math": 0.937984496124031, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6838709677419355, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9462332620219714, "success_rate.epoch.global": 0.8361858190709046, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9912790697674418, "tokens_p.mean_in_band": 0.760546875, "tokens_rate.above_band": 0.8958333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10416666666666667 }, { "epoch": 2.0451640391989776, "grad_norm": 80.00764951612555, "learning_rate": 3.5984292077104777e-07, "loss": 0.2917, "step": 9600, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9583333333333334, "success_rate.epoch.env.logic": 0.8148148148148148, "success_rate.epoch.env.math": 0.9384615384615385, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6770186335403726, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9444207563772781, "success_rate.epoch.global": 0.8305489260143198, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9995105421686747, "tokens_p.mean_in_band": 0.5015869140625, "tokens_rate.above_band": 0.962877030162413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037122969837587005 }, { "epoch": 2.046229228802727, "grad_norm": 89.76733159137095, "learning_rate": 3.598087733495002e-07, "loss": 0.3909, "step": 9605, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.96, "success_rate.epoch.env.logic": 0.8103448275862069, "success_rate.epoch.env.math": 0.9393939393939394, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6809815950920245, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.944610942006561, "success_rate.epoch.global": 0.8321678321678322, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970238095238095, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9910112359550561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008988764044943821 }, { "epoch": 2.0472944184064765, "grad_norm": 377.234369819399, "learning_rate": 3.597746260673172e-07, "loss": 0.2693, "step": 9610, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.8032786885245902, "success_rate.epoch.env.math": 0.9398496240601504, "success_rate.epoch.env.sat": 1.0, "success_rate.epoch.env.science": 0.6848484848484848, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9445013871792441, "success_rate.epoch.global": 0.8337129840546698, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9523809523809523, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979771627344223, "tokens_p.mean_in_band": 0.42367788461538464, "tokens_rate.above_band": 0.984514592019059, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015485407980941036 }, { "epoch": 2.0483596080102258, "grad_norm": 362.7960061936099, "learning_rate": 3.5974047894938513e-07, "loss": 0.3669, "step": 9615, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8125, "success_rate.epoch.env.math": 0.9407407407407408, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6826347305389222, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.899894403112966, "success_rate.epoch.global": 0.8329621380846325, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980849582172702, "tokens_p.mean_in_band": 0.5412946428571429, "tokens_rate.above_band": 0.9447368421052632, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05526315789473684 }, { "epoch": 2.0494247976139754, "grad_norm": 155.86362774108335, "learning_rate": 3.5970633202059017e-07, "loss": 0.313, "step": 9620, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8125, "success_rate.epoch.env.math": 0.9428571428571428, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6882352941176471, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9005959454488867, "success_rate.epoch.global": 0.8366013071895425, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99625, "tokens_p.mean_in_band": 0.7765625, "tokens_rate.above_band": 0.967741935483871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03225806451612903 }, { "epoch": 2.0504899872177247, "grad_norm": 86.06508562645304, "learning_rate": 3.5967218530581826e-07, "loss": 0.3441, "step": 9625, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8153846153846154, "success_rate.epoch.env.math": 0.9440559440559441, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6988636363636364, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9019333780697416, "success_rate.epoch.global": 0.8400852878464818, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9925, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.0515551768214744, "grad_norm": 144.8050227408927, "learning_rate": 3.596380388299554e-07, "loss": 0.2577, "step": 9630, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8181818181818182, "success_rate.epoch.env.math": 0.9455782312925171, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.7055555555555556, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9029344152720775, "success_rate.epoch.global": 0.8434237995824635, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997564935064935, "tokens_p.mean_in_band": 0.82421875, "tokens_rate.above_band": 0.9956896551724138, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004310344827586207 }, { "epoch": 2.0526203664252236, "grad_norm": 68.7718047074938, "learning_rate": 3.5960389261788724e-07, "loss": 0.3167, "step": 9635, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8088235294117647, "success_rate.epoch.env.math": 0.9466666666666667, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.6994535519125683, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9016278828139966, "success_rate.epoch.global": 0.8404907975460123, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7083333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959795321637427, "tokens_p.mean_in_band": 0.503125, "tokens_rate.above_band": 0.9447513812154696, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055248618784530384 }, { "epoch": 2.0536855560289733, "grad_norm": 54.50339168876519, "learning_rate": 3.595697466944992e-07, "loss": 0.2692, "step": 9640, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8088235294117647, "success_rate.epoch.env.math": 0.9473684210526315, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.7037037037037037, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9020780561028239, "success_rate.epoch.global": 0.8416833667334669, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915487421383647, "tokens_p.mean_in_band": 0.7017045454545454, "tokens_rate.above_band": 0.9352941176470588, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06470588235294118 }, { "epoch": 2.0547507456327225, "grad_norm": 34.004449514286705, "learning_rate": 3.5953560108467675e-07, "loss": 0.2095, "step": 9645, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.8142857142857143, "success_rate.epoch.env.math": 0.948051948051948, "success_rate.epoch.env.sat": 0.5, "success_rate.epoch.env.science": 0.7098445595854922, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.9031950168078289, "success_rate.epoch.global": 0.8447937131630648, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990131578947369, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9965034965034965, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0034965034965034965 }, { "epoch": 2.055815935236472, "grad_norm": 51.50414655273183, "learning_rate": 3.595014558133049e-07, "loss": 0.1324, "step": 9650, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9642857142857143, "success_rate.epoch.env.logic": 0.8194444444444444, "success_rate.epoch.env.math": 0.9490445859872612, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7091836734693877, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.888662886501831, "success_rate.epoch.global": 0.8439306358381503, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9920197469325154, "tokens_p.mean_in_band": 0.7576462765957447, "tokens_rate.above_band": 0.932761087267525, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06723891273247497 }, { "epoch": 2.0568811248402215, "grad_norm": 304.15525353544035, "learning_rate": 3.594673109052685e-07, "loss": 0.2903, "step": 9655, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9642857142857143, "success_rate.epoch.env.logic": 0.821917808219178, "success_rate.epoch.env.math": 0.9493670886075949, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7192118226600985, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8898287061005381, "success_rate.epoch.global": 0.8468809073724007, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969835907335908, "tokens_p.mean_in_band": 0.7788461538461539, "tokens_rate.above_band": 0.9522058823529411, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04779411764705882 }, { "epoch": 2.057946314443971, "grad_norm": 64.29470240299285, "learning_rate": 3.59433166385452e-07, "loss": 0.1572, "step": 9660, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9655172413793104, "success_rate.epoch.env.logic": 0.821917808219178, "success_rate.epoch.env.math": 0.95, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7285714285714285, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8908490737730227, "success_rate.epoch.global": 0.849721706864564, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968312937062938, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9930555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006944444444444444 }, { "epoch": 2.0590115040477204, "grad_norm": 48.02552504760577, "learning_rate": 3.593990222787398e-07, "loss": 0.178, "step": 9665, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9655172413793104, "success_rate.epoch.env.logic": 0.821917808219178, "success_rate.epoch.env.math": 0.9515151515151515, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7311320754716981, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8912196009016974, "success_rate.epoch.global": 0.8524590163934426, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973404255319149, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.06007669365147, "grad_norm": 58.02687871513042, "learning_rate": 3.5936487861001584e-07, "loss": 0.2438, "step": 9670, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9666666666666667, "success_rate.epoch.env.logic": 0.821917808219178, "success_rate.epoch.env.math": 0.9515151515151515, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7385321100917431, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.891996824529643, "success_rate.epoch.global": 0.8548387096774194, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998015873015873, "tokens_p.mean_in_band": 0.7240513392857143, "tokens_rate.above_band": 0.9818181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01818181818181818 }, { "epoch": 2.0611418832552193, "grad_norm": 55.31759833065296, "learning_rate": 3.5933073540416383e-07, "loss": 0.1866, "step": 9675, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9666666666666667, "success_rate.epoch.env.logic": 0.8266666666666667, "success_rate.epoch.env.math": 0.9520958083832335, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7443946188340808, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8930142812621802, "success_rate.epoch.global": 0.8573943661971831, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_in_band": 0.763671875, "tokens_rate.above_band": 0.9629629629629629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037037037037037035 }, { "epoch": 2.062207072858969, "grad_norm": 104.55919875876411, "learning_rate": 3.59296592686067e-07, "loss": 0.2903, "step": 9680, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.967741935483871, "success_rate.epoch.env.logic": 0.8266666666666667, "success_rate.epoch.env.math": 0.9523809523809523, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7400881057268722, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8927464539628814, "success_rate.epoch.global": 0.856401384083045, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995547493403694, "tokens_p.mean_in_band": 0.6388671875, "tokens_rate.above_band": 0.9869791666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013020833333333334 }, { "epoch": 2.063272262462718, "grad_norm": 98.18511053990107, "learning_rate": 3.592624504806084e-07, "loss": 0.2499, "step": 9685, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.967741935483871, "success_rate.epoch.env.logic": 0.8289473684210527, "success_rate.epoch.env.math": 0.9532163742690059, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7402597402597403, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8930453410697275, "success_rate.epoch.global": 0.8571428571428571, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972222222222222, "tokens_p.mean_in_band": 0.6947544642857143, "tokens_rate.above_band": 0.9625668449197861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0374331550802139 }, { "epoch": 2.064337452066468, "grad_norm": 108.30435465743868, "learning_rate": 3.5922830881267054e-07, "loss": 0.1795, "step": 9690, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8311688311688312, "success_rate.epoch.env.math": 0.953757225433526, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7468354430379747, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8911449848157879, "success_rate.epoch.global": 0.8578595317725752, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9888572386058981, "tokens_p.mean_below_band": 8.307397365570068e-07, "tokens_p.mean_in_band": 0.48441972805343514, "tokens_rate.above_band": 0.7393458870168483, "tokens_rate.below_band": 0.0009910802775024777, "tokens_rate.in_band": 0.25966303270564917 }, { "epoch": 2.065402641670217, "grad_norm": 156.47234223738994, "learning_rate": 3.5919416770713567e-07, "loss": 0.2071, "step": 9695, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8311688311688312, "success_rate.epoch.env.math": 0.9540229885057471, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7551020408163265, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8919206539840218, "success_rate.epoch.global": 0.8601973684210527, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9897388059701493, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.066467831273967, "grad_norm": 66.73295862149546, "learning_rate": 3.591600271888857e-07, "loss": 0.2947, "step": 9700, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8395061728395061, "success_rate.epoch.env.math": 0.9542857142857143, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7590361445783133, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8930601240942607, "success_rate.epoch.global": 0.8622366288492707, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962993421052632, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9934640522875817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006535947712418301 }, { "epoch": 2.067533020877716, "grad_norm": 68.04145636664377, "learning_rate": 3.5912588728280177e-07, "loss": 0.0881, "step": 9705, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8395061728395061, "success_rate.epoch.env.math": 0.9553072625698324, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7598425196850394, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8932262989479738, "success_rate.epoch.global": 0.8628389154704944, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929552023121387, "tokens_p.mean_in_band": 0.6354166666666666, "tokens_rate.above_band": 0.9505494505494505, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04945054945054945 }, { "epoch": 2.0685982104814657, "grad_norm": 160.84378379603496, "learning_rate": 3.5909174801376493e-07, "loss": 0.2373, "step": 9710, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8414634146341463, "success_rate.epoch.env.math": 0.9567567567567568, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7626459143968871, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8937908562837386, "success_rate.epoch.global": 0.8649921507064364, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99, "tokens_p.mean_in_band": 0.80859375, "tokens_rate.above_band": 0.9803921568627451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0196078431372549 }, { "epoch": 2.069663400085215, "grad_norm": 126.90666648615168, "learning_rate": 3.590576094066556e-07, "loss": 0.2349, "step": 9715, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8433734939759037, "success_rate.epoch.env.math": 0.9574468085106383, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7615384615384615, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8939265543053033, "success_rate.epoch.global": 0.865533230293663, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967072564612326, "tokens_p.mean_in_band": 0.583984375, "tokens_rate.above_band": 0.9843444227005871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015655577299412915 }, { "epoch": 2.0707285896889647, "grad_norm": 258.67673619265275, "learning_rate": 3.5902347148635376e-07, "loss": 0.1615, "step": 9720, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8452380952380952, "success_rate.epoch.env.math": 0.9576719576719577, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7565543071161048, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8936634266690447, "success_rate.epoch.global": 0.863013698630137, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.8928571428571428, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_in_band": 0.6501953125, "tokens_rate.above_band": 0.9224806201550387, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07751937984496124 }, { "epoch": 2.071793779292714, "grad_norm": 51.018949507965054, "learning_rate": 3.589893342777389e-07, "loss": 0.27, "step": 9725, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8409090909090909, "success_rate.epoch.env.math": 0.9581151832460733, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7555555555555555, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8932193784585504, "success_rate.epoch.global": 0.8618618618618619, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8055555555555555, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9993043664383562, "tokens_p.mean_in_band": 0.4781901041666667, "tokens_rate.above_band": 0.9605263157894737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039473684210526314 }, { "epoch": 2.0728589688964636, "grad_norm": 34.658069142671394, "learning_rate": 3.5895519780568993e-07, "loss": 0.2078, "step": 9730, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.8461538461538461, "success_rate.epoch.env.math": 0.9591836734693877, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7555555555555555, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8939654861732784, "success_rate.epoch.global": 0.863905325443787, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965363300492611, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9975429975429976, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002457002457002457 }, { "epoch": 2.073924158500213, "grad_norm": 128.17836059182127, "learning_rate": 3.589210620950853e-07, "loss": 0.3631, "step": 9735, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.8478260869565217, "success_rate.epoch.env.math": 0.9595959595959596, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7536231884057971, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8939793188805047, "success_rate.epoch.global": 0.8629737609329446, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9879807692307693, "tokens_p.mean_in_band": 0.59453125, "tokens_rate.above_band": 0.9122807017543859, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08771929824561403 }, { "epoch": 2.0749893481039625, "grad_norm": 403.3371051957635, "learning_rate": 3.588869271708029e-07, "loss": 0.0885, "step": 9740, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8494623655913979, "success_rate.epoch.env.math": 0.9601990049751243, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.7571428571428571, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8946649119664499, "success_rate.epoch.global": 0.8649425287356322, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996374709976799, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9953810623556582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004618937644341801 }, { "epoch": 2.076054537707712, "grad_norm": 31.23771291370937, "learning_rate": 3.5885279305772e-07, "loss": 0.2103, "step": 9745, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.851063829787234, "success_rate.epoch.env.math": 0.9603960396039604, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7614035087719299, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.883094531704669, "success_rate.epoch.global": 0.8640226628895185, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979020979020979, "tokens_p.mean_in_band": 0.6794181034482759, "tokens_rate.above_band": 0.9610215053763441, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038978494623655914 }, { "epoch": 2.0771197273114614, "grad_norm": 142.84453153763224, "learning_rate": 3.588186597807132e-07, "loss": 0.1762, "step": 9750, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.8541666666666666, "success_rate.epoch.env.math": 0.9607843137254902, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7647058823529411, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8837121212121212, "success_rate.epoch.global": 0.8659217877094972, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965193704600485, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9975845410628019, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0024154589371980675 }, { "epoch": 2.078184916915211, "grad_norm": 212.249322311458, "learning_rate": 3.587845273646587e-07, "loss": 0.1883, "step": 9755, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8556701030927835, "success_rate.epoch.env.math": 0.9609756097560975, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7663230240549829, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8843102892134825, "success_rate.epoch.global": 0.8677685950413223, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9944381598793364, "tokens_p.mean_in_band": 0.69140625, "tokens_rate.above_band": 0.9977426636568849, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002257336343115124 }, { "epoch": 2.0792501065189604, "grad_norm": 33.88421265661674, "learning_rate": 3.587503958344319e-07, "loss": 0.1871, "step": 9760, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.8556701030927835, "success_rate.epoch.env.math": 0.9617224880382775, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7679180887372014, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.884659693255837, "success_rate.epoch.global": 0.8695652173913043, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989762090483619, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.08031529612271, "grad_norm": 275.80249510225406, "learning_rate": 3.5871626521490764e-07, "loss": 0.2488, "step": 9765, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8556701030927835, "success_rate.epoch.env.math": 0.9620853080568721, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7676767676767676, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8848000545344594, "success_rate.epoch.global": 0.8699731903485255, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992320415879017, "tokens_p.mean_in_band": 0.58359375, "tokens_rate.above_band": 0.9906367041198502, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009363295880149813 }, { "epoch": 2.0813804857264593, "grad_norm": 37.01370628622843, "learning_rate": 3.5868213553096006e-07, "loss": 0.1954, "step": 9770, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8484848484848485, "success_rate.epoch.env.math": 0.9627906976744186, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.770764119601329, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8844916442557479, "success_rate.epoch.global": 0.8703703703703703, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940580985915493, "tokens_p.mean_in_band": 0.2216796875, "tokens_rate.above_band": 0.993006993006993, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006993006993006993 }, { "epoch": 2.082445675330209, "grad_norm": 388.79164726042114, "learning_rate": 3.586480068074627e-07, "loss": 0.2549, "step": 9775, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8529411764705882, "success_rate.epoch.env.math": 0.9634703196347032, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7722772277227723, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8850961040800631, "success_rate.epoch.global": 0.8720626631853786, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99609375, "tokens_p.mean_in_band": 0.853515625, "tokens_rate.above_band": 0.9565217391304348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043478260869565216 }, { "epoch": 2.083510864933958, "grad_norm": 99.21320677865037, "learning_rate": 3.586138790692882e-07, "loss": 0.2763, "step": 9780, "success_rate.epoch.env.abd": 1.0, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8529411764705882, "success_rate.epoch.env.math": 0.9641255605381166, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7719869706840391, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8851292844313977, "success_rate.epoch.global": 0.8724226804123711, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999096820809249, "tokens_p.mean_in_band": 0.642578125, "tokens_rate.above_band": 0.9942528735632183, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005747126436781609 }, { "epoch": 2.084576054537708, "grad_norm": 145.39397899274596, "learning_rate": 3.5857975234130867e-07, "loss": 0.2596, "step": 9785, "success_rate.epoch.env.abd": 0.9767441860465116, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8543689320388349, "success_rate.epoch.env.math": 0.9646017699115044, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7741935483870968, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8835114895547178, "success_rate.epoch.global": 0.8727735368956743, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942540322580645, "tokens_p.mean_below_band": 3.741847144232856e-07, "tokens_p.mean_in_band": 0.10090948462537895, "tokens_rate.above_band": 0.21102791014295438, "tokens_rate.below_band": 0.0030633083730428863, "tokens_rate.in_band": 0.7859087814840027 }, { "epoch": 2.085641244141457, "grad_norm": 407.73000929703767, "learning_rate": 3.5854562664839547e-07, "loss": 0.2151, "step": 9790, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8584905660377359, "success_rate.epoch.env.math": 0.9647577092511013, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7770700636942676, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8811796074218587, "success_rate.epoch.global": 0.8731155778894473, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948308270676691, "tokens_p.mean_in_band": 0.6607142857142857, "tokens_rate.above_band": 0.9047619047619048, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09523809523809523 }, { "epoch": 2.086706433745207, "grad_norm": 210.247580059442, "learning_rate": 3.5851150201541906e-07, "loss": 0.1942, "step": 9795, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8598130841121495, "success_rate.epoch.env.math": 0.9650655021834061, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.778125, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8814237208138999, "success_rate.epoch.global": 0.8734491315136477, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982244318181818, "tokens_p.mean_in_band": 0.709765625, "tokens_rate.above_band": 0.9723756906077348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027624309392265192 }, { "epoch": 2.087771623348956, "grad_norm": 91.59689440486994, "learning_rate": 3.5847737846724935e-07, "loss": 0.2954, "step": 9800, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8598130841121495, "success_rate.epoch.env.math": 0.9658119658119658, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7777777777777778, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8814600154872033, "success_rate.epoch.global": 0.8737745098039216, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978966346153846, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 2.0888368129527057, "grad_norm": 53.24308318000804, "learning_rate": 3.584432560287552e-07, "loss": 0.2765, "step": 9805, "success_rate.epoch.env.abd": 0.9772727272727273, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.8611111111111112, "success_rate.epoch.env.math": 0.9661016949152542, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7784615384615384, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8816665170132043, "success_rate.epoch.global": 0.8753026634382567, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983013775130305, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9992559523809523, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.000744047619047619 }, { "epoch": 2.089902002556455, "grad_norm": 121.81374819834977, "learning_rate": 3.584091347248049e-07, "loss": 0.2179, "step": 9810, "success_rate.epoch.env.abd": 0.9777777777777777, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8611111111111112, "success_rate.epoch.env.math": 0.9665271966527197, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7774390243902439, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8818855717085128, "success_rate.epoch.global": 0.8755980861244019, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0002246732026143, "tokens_p.mean_in_band": 0.6015625, "tokens_rate.above_band": 0.9986945169712794, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0013054830287206266 }, { "epoch": 2.0909671921602047, "grad_norm": 41.60592812384703, "learning_rate": 3.583750145802658e-07, "loss": 0.3349, "step": 9815, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.96, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.8611111111111112, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7801204819277109, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8785916220747815, "success_rate.epoch.global": 0.875886524822695, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990433673469388, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9865771812080537, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013422818791946308 }, { "epoch": 2.092032381763954, "grad_norm": 96.57248052582246, "learning_rate": 3.583408956200044e-07, "loss": 0.1948, "step": 9820, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.96, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.8623853211009175, "success_rate.epoch.env.math": 0.9673469387755103, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.781437125748503, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8790952571490854, "success_rate.epoch.global": 0.8773364485981309, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997455636743215, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9979166666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0020833333333333333 }, { "epoch": 2.0930975713677036, "grad_norm": 73.38948293390685, "learning_rate": 3.5830677786888634e-07, "loss": 0.4176, "step": 9825, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.8648648648648649, "success_rate.epoch.env.math": 0.964, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7797619047619048, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8790039703800219, "success_rate.epoch.global": 0.8764434180138568, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939563679245284, "tokens_p.mean_in_band": 0.3995615641276042, "tokens_rate.above_band": 0.9724770642201835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027522935779816515 }, { "epoch": 2.094162760971453, "grad_norm": 0.0, "learning_rate": 3.5827266135177634e-07, "loss": 0.2741, "step": 9830, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8648648648648649, "success_rate.epoch.env.math": 0.9644268774703557, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7807017543859649, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8792243167024624, "success_rate.epoch.global": 0.8767123287671232, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951716738197425, "tokens_p.mean_in_band": 0.7184244791666666, "tokens_rate.above_band": 0.9748953974895398, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02510460251046025 }, { "epoch": 2.0952279505752025, "grad_norm": 197.33323881507908, "learning_rate": 3.582385460935384e-07, "loss": 0.2579, "step": 9835, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8648648648648649, "success_rate.epoch.env.math": 0.96484375, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7809798270893372, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8794169936713278, "success_rate.epoch.global": 0.8769751693002258, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971217105263158, "tokens_p.mean_in_band": 0.39453125, "tokens_rate.above_band": 0.9956331877729258, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004366812227074236 }, { "epoch": 2.0962931401789517, "grad_norm": 141.31835519757158, "learning_rate": 3.5820443211903545e-07, "loss": 0.2315, "step": 9840, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.8648648648648649, "success_rate.epoch.env.math": 0.9652509652509652, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7806267806267806, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776137935712403, "success_rate.epoch.global": 0.8761160714285714, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924395161290323, "tokens_p.mean_in_band": 0.6625, "tokens_rate.above_band": 0.8920863309352518, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1079136690647482 }, { "epoch": 2.0973583297827014, "grad_norm": 0.0, "learning_rate": 3.581703194531294e-07, "loss": 0.3037, "step": 9845, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.8660714285714286, "success_rate.epoch.env.math": 0.9653846153846154, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7849162011173184, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8781255785103995, "success_rate.epoch.global": 0.8774834437086093, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.8560267857142857, "tokens_rate.above_band": 0.9625668449197861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0374331550802139 }, { "epoch": 2.0984235193864507, "grad_norm": 78.01096507401617, "learning_rate": 3.581362081206814e-07, "loss": 0.2301, "step": 9850, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8672566371681416, "success_rate.epoch.env.math": 0.9657794676806084, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7845303867403315, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8783658986811304, "success_rate.epoch.global": 0.8777292576419214, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976924587588374, "tokens_p.mean_in_band": 0.7083333333333334, "tokens_rate.above_band": 0.9906614785992218, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00933852140077821 }, { "epoch": 2.0994887089902003, "grad_norm": 103.19034151426482, "learning_rate": 3.581020981465515e-07, "loss": 0.3427, "step": 9855, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8672566371681416, "success_rate.epoch.env.math": 0.9661654135338346, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7859078590785907, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8785262094258107, "success_rate.epoch.global": 0.8779697624190065, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921016483516484, "tokens_p.mean_in_band": 0.7278645833333334, "tokens_rate.above_band": 0.91, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09 }, { "epoch": 2.1005538985939496, "grad_norm": 308.68351450125414, "learning_rate": 3.5806798955559886e-07, "loss": 0.2511, "step": 9860, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9347826086956522, "success_rate.epoch.env.logic": 0.8695652173913043, "success_rate.epoch.env.math": 0.9662921348314607, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.786096256684492, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8759508747143844, "success_rate.epoch.global": 0.8771367521367521, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994179799426934, "tokens_p.mean_in_band": 0.49441964285714285, "tokens_rate.above_band": 0.9900709219858156, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009929078014184398 }, { "epoch": 2.1016190881976993, "grad_norm": 314.3046116266596, "learning_rate": 3.5803388237268156e-07, "loss": 0.2412, "step": 9865, "success_rate.epoch.env.abd": 0.9787234042553191, "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8695652173913043, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7857142857142857, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8761972340025068, "success_rate.epoch.global": 0.8773784355179705, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996099695585997, "tokens_p.mean_in_band": 0.54453125, "tokens_rate.above_band": 0.9924471299093656, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0075528700906344415 }, { "epoch": 2.1026842778014485, "grad_norm": 50.924108583305674, "learning_rate": 3.5799977662265666e-07, "loss": 0.258, "step": 9870, "success_rate.epoch.env.abd": 0.9791666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.9333333333333333, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8571428571428571, "success_rate.epoch.env.math": 0.967032967032967, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7842105263157895, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8750048197416619, "success_rate.epoch.global": 0.8755230125523012, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0000480030721965, "tokens_p.mean_in_band": 0.5639105902777778, "tokens_rate.above_band": 0.9730941704035875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026905829596412557 }, { "epoch": 2.103749467405198, "grad_norm": 84.85979172482334, "learning_rate": 3.5796567233038016e-07, "loss": 0.1677, "step": 9875, "success_rate.epoch.env.abd": 0.9791666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.8583333333333333, "success_rate.epoch.env.math": 0.9676258992805755, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7859007832898173, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8755161109277091, "success_rate.epoch.global": 0.8768115942028986, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977609034267912, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.9968944099378882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003105590062111801 }, { "epoch": 2.1048146570089474, "grad_norm": 129.0968070491486, "learning_rate": 3.5793156952070705e-07, "loss": 0.1709, "step": 9880, "success_rate.epoch.env.abd": 0.98, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.860655737704918, "success_rate.epoch.env.math": 0.9678571428571429, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7875647668393783, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8759752895487135, "success_rate.epoch.global": 0.8780737704918032, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978197674418605, "tokens_p.mean_in_band": 0.8916015625, "tokens_rate.above_band": 0.9772727272727273, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022727272727272728 }, { "epoch": 2.105879846612697, "grad_norm": 78.38916540916124, "learning_rate": 3.5789746821849127e-07, "loss": 0.293, "step": 9885, "success_rate.epoch.env.abd": 0.98, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.864, "success_rate.epoch.env.math": 0.9680851063829787, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7846153846153846, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8761478671669867, "success_rate.epoch.global": 0.8772819472616633, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9959112149532711, "tokens_p.mean_in_band": 0.44921875, "tokens_rate.above_band": 0.9304347826086956, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06956521739130435 }, { "epoch": 2.1069450362164464, "grad_norm": 64.62336526775415, "learning_rate": 3.5786336844858546e-07, "loss": 0.2072, "step": 9890, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.8650793650793651, "success_rate.epoch.env.math": 0.9683098591549296, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7848101265822784, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8763540572203902, "success_rate.epoch.global": 0.8775100401606426, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968913612565445, "tokens_p.mean_in_band": 0.5032552083333334, "tokens_rate.above_band": 0.9845360824742269, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015463917525773196 }, { "epoch": 2.108010225820196, "grad_norm": 0.0, "learning_rate": 3.5782927023584136e-07, "loss": 0.2612, "step": 9895, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9354838709677419, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9387755102040817, "success_rate.epoch.env.logic": 0.859375, "success_rate.epoch.env.math": 0.9685314685314685, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7880299251870324, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8761483338478382, "success_rate.epoch.global": 0.8777335984095428, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9913563829787234, "tokens_p.mean_in_band": 0.6927083333333334, "tokens_rate.above_band": 0.912621359223301, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08737864077669903 }, { "epoch": 2.1090754154239453, "grad_norm": 88.72607914915754, "learning_rate": 3.5779517360510954e-07, "loss": 0.4011, "step": 9900, "success_rate.epoch.env.abd": 0.9807692307692307, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9423076923076923, "success_rate.epoch.env.logic": 0.8604651162790697, "success_rate.epoch.env.math": 0.96875, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7866004962779156, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8766417456636887, "success_rate.epoch.global": 0.8779527559055118, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988386824324325, "tokens_p.mean_in_band": 0.64990234375, "tokens_rate.above_band": 0.9946236559139785, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005376344086021506 }, { "epoch": 2.110140605027695, "grad_norm": 160.4988556227826, "learning_rate": 3.577610785812394e-07, "loss": 0.2467, "step": 9905, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9375, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.8625954198473282, "success_rate.epoch.env.math": 0.9689655172413794, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7881773399014779, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.877225587597545, "success_rate.epoch.global": 0.8791423001949318, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997301479468599, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.111205794631444, "grad_norm": 450.5338541198487, "learning_rate": 3.57726985189079e-07, "loss": 0.2826, "step": 9910, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9393939393939394, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8646616541353384, "success_rate.epoch.env.math": 0.9691780821917808, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7892156862745098, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8778796977784094, "success_rate.epoch.global": 0.8803088803088803, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997320732657833, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9992211838006231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.000778816199376947 }, { "epoch": 2.112270984235194, "grad_norm": 23.190480883870144, "learning_rate": 3.576928934534756e-07, "loss": 0.2372, "step": 9915, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8656716417910447, "success_rate.epoch.env.math": 0.9693877551020408, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7912621359223301, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8784914535672268, "success_rate.epoch.global": 0.8814531548757171, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973997028231798, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9985163204747775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001483679525222552 }, { "epoch": 2.113336173838943, "grad_norm": 69.2132976853282, "learning_rate": 3.5765880339927475e-07, "loss": 0.3353, "step": 9920, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9464285714285714, "success_rate.epoch.env.logic": 0.8666666666666667, "success_rate.epoch.env.math": 0.9693877551020408, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7933491686460807, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8787716406217151, "success_rate.epoch.global": 0.8816287878787878, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933035714285714, "tokens_p.mean_in_band": 0.4966517857142857, "tokens_rate.above_band": 0.9333333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06666666666666667 }, { "epoch": 2.114401363442693, "grad_norm": 46.075415051842846, "learning_rate": 3.5762471505132125e-07, "loss": 0.2824, "step": 9925, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9428571428571428, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.8676470588235294, "success_rate.epoch.env.math": 0.9693877551020408, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7868852459016393, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8784209520201665, "success_rate.epoch.global": 0.8789868667917449, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9966744087837838, "tokens_p.mean_in_band": 0.4152644230769231, "tokens_rate.above_band": 0.9579288025889967, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042071197411003236 }, { "epoch": 2.115466553046442, "grad_norm": 76.85179740366831, "learning_rate": 3.575906284344583e-07, "loss": 0.2347, "step": 9930, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9491525423728814, "success_rate.epoch.env.logic": 0.8686131386861314, "success_rate.epoch.env.math": 0.9697986577181208, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7878787878787878, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8789429472350193, "success_rate.epoch.global": 0.8801115241635687, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995449029126213, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.1165317426501917, "grad_norm": 452.0678086516589, "learning_rate": 3.57556543573528e-07, "loss": 0.3612, "step": 9935, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8714285714285714, "success_rate.epoch.env.math": 0.9701986754966887, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.7878787878787878, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8793123025212128, "success_rate.epoch.global": 0.8812154696132597, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997874149659864, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9988674971687429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0011325028312570782 }, { "epoch": 2.1175969322539414, "grad_norm": 158.39559236936503, "learning_rate": 3.5752246049337125e-07, "loss": 0.2274, "step": 9940, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.8741258741258742, "success_rate.epoch.env.math": 0.9702970297029703, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.789838337182448, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776096077687033, "success_rate.epoch.global": 0.8813868613138686, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965753424657534, "tokens_p.mean_in_band": 0.6015625, "tokens_rate.above_band": 0.9647577092511013, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03524229074889868 }, { "epoch": 2.1186621218576907, "grad_norm": 78.70995162916155, "learning_rate": 3.5748837921882747e-07, "loss": 0.2829, "step": 9945, "success_rate.epoch.env.abd": 0.9824561403508771, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8758620689655172, "success_rate.epoch.env.math": 0.9707792207792207, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7903225806451613, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779582972885009, "success_rate.epoch.global": 0.8824593128390597, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973821989528796, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9982578397212544, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017421602787456446 }, { "epoch": 2.1197273114614403, "grad_norm": 115.78725251556781, "learning_rate": 3.5745429977473487e-07, "loss": 0.3258, "step": 9950, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.9444444444444444, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8775510204081632, "success_rate.epoch.env.math": 0.9709677419354839, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7881548974943052, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779594127236674, "success_rate.epoch.global": 0.8817204301075269, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994140625, "tokens_p.mean_in_band": 0.466796875, "tokens_rate.above_band": 0.975609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024390243902439025 }, { "epoch": 2.1207925010651896, "grad_norm": 63.8070756453753, "learning_rate": 3.574202221859303e-07, "loss": 0.2211, "step": 9955, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.9459459459459459, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8733333333333333, "success_rate.epoch.env.math": 0.9710610932475884, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7891156462585034, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8778083140421196, "success_rate.epoch.global": 0.8818827708703375, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973290598290598, "tokens_p.mean_in_band": 0.7256433823529411, "tokens_rate.above_band": 0.9856781802864364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014321819713563605 }, { "epoch": 2.1218576906689393, "grad_norm": 195.56920867765484, "learning_rate": 3.573861464772492e-07, "loss": 0.3186, "step": 9960, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8741721854304636, "success_rate.epoch.env.math": 0.9714285714285714, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7882882882882883, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779986480587157, "success_rate.epoch.global": 0.8820422535211268, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966104497354498, "tokens_p.mean_in_band": 0.73203125, "tokens_rate.above_band": 0.9869451697127938, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013054830287206266 }, { "epoch": 2.1229228802726885, "grad_norm": 119.32577809880027, "learning_rate": 3.573520726735258e-07, "loss": 0.265, "step": 9965, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.875, "success_rate.epoch.env.math": 0.9715189873417721, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7884187082405345, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.878242344783807, "success_rate.epoch.global": 0.8821989528795812, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9666666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983742774566474, "tokens_p.mean_in_band": 0.6294642857142857, "tokens_rate.above_band": 0.9801699716713881, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019830028328611898 }, { "epoch": 2.123988069876438, "grad_norm": 36.09824604465382, "learning_rate": 3.573180007995928e-07, "loss": 0.0905, "step": 9970, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8774193548387097, "success_rate.epoch.env.math": 0.9719626168224299, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7888888888888889, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8785453597808729, "success_rate.epoch.global": 0.8832179930795848, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993206521739131, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9928057553956835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007194244604316547 }, { "epoch": 2.1250532594801874, "grad_norm": 87.4942570720097, "learning_rate": 3.5728393088028163e-07, "loss": 0.3072, "step": 9975, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.8782051282051282, "success_rate.epoch.env.math": 0.9691358024691358, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7912087912087912, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.878570710811148, "success_rate.epoch.global": 0.8833619210977701, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0002414459161149, "tokens_p.mean_in_band": 0.32734375, "tokens_rate.above_band": 0.9945115257958288, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005488474204171241 }, { "epoch": 2.126118449083937, "grad_norm": 120.08776235586902, "learning_rate": 3.57249862940422e-07, "loss": 0.2397, "step": 9980, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.879746835443038, "success_rate.epoch.env.math": 0.9694189602446484, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7925764192139738, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8764077968416466, "success_rate.epoch.global": 0.8835034013605442, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978777066929134, "tokens_p.mean_below_band": 5.20230969414115e-10, "tokens_p.mean_in_band": 0.8080357142857143, "tokens_rate.above_band": 0.9921875, "tokens_rate.below_band": 0.0009765625, "tokens_rate.in_band": 0.0068359375 }, { "epoch": 2.1271836386876863, "grad_norm": 28.93575935653819, "learning_rate": 3.5721579700484256e-07, "loss": 0.252, "step": 9985, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8819875776397516, "success_rate.epoch.env.math": 0.9697885196374623, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7934782608695652, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8767969052416708, "success_rate.epoch.global": 0.8844856661045531, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.128248828291436, "grad_norm": 102.13109463315611, "learning_rate": 3.571817330983703e-07, "loss": 0.3414, "step": 9990, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8827160493827161, "success_rate.epoch.env.math": 0.9700598802395209, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7939914163090128, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8769344495857134, "success_rate.epoch.global": 0.8846153846153846, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953703703703703, "tokens_p.mean_in_band": 0.5611979166666666, "tokens_rate.above_band": 0.972972972972973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02702702702702703 }, { "epoch": 2.1293140178951853, "grad_norm": 123.16740415066614, "learning_rate": 3.5714767124583063e-07, "loss": 0.2379, "step": 9995, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8841463414634146, "success_rate.epoch.env.math": 0.9703264094955489, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7914893617021277, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8768860851105824, "success_rate.epoch.global": 0.8839137645107794, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970079787234043, "tokens_p.mean_in_band": 0.5575284090909091, "tokens_rate.above_band": 0.8103448275862069, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1896551724137931 }, { "epoch": 2.130379207498935, "grad_norm": 256.25816493493846, "learning_rate": 3.5711361147204767e-07, "loss": 0.2942, "step": 10000, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8848484848484849, "success_rate.epoch.env.math": 0.9705014749262537, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7932489451476793, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8772407457133379, "success_rate.epoch.global": 0.8848684210526315, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996654175588865, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.131444397102684, "grad_norm": 77.31242397726325, "learning_rate": 3.570795538018439e-07, "loss": 0.3695, "step": 10005, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8855421686746988, "success_rate.epoch.env.math": 0.9710144927536232, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7941176470588235, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8774519656622236, "success_rate.epoch.global": 0.8858075040783034, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953271028037384, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.132509586706434, "grad_norm": 88.03783538102049, "learning_rate": 3.570454982600404e-07, "loss": 0.2477, "step": 10010, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8869047619047619, "success_rate.epoch.env.math": 0.9712643678160919, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7958333333333333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777545251683182, "success_rate.epoch.global": 0.8867313915857605, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978373702422145, "tokens_p.mean_in_band": 0.79296875, "tokens_rate.above_band": 0.996551724137931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0034482758620689655 }, { "epoch": 2.133574776310183, "grad_norm": 44.934618978991544, "learning_rate": 3.5701144487145644e-07, "loss": 0.2371, "step": 10015, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.95, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.888235294117647, "success_rate.epoch.env.math": 0.9713467048710601, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.7942386831275721, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8778545497450586, "success_rate.epoch.global": 0.8860353130016051, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974609375, "tokens_p.mean_in_band": 0.6341145833333334, "tokens_rate.above_band": 0.9467455621301775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05325443786982249 }, { "epoch": 2.134639965913933, "grad_norm": 35.167712395122784, "learning_rate": 3.569773936609101e-07, "loss": 0.2102, "step": 10020, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9538461538461539, "success_rate.epoch.env.logic": 0.8888888888888888, "success_rate.epoch.env.math": 0.9715099715099715, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7955010224948875, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8767022010311888, "success_rate.epoch.global": 0.8861464968152867, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969457687723481, "tokens_p.mean_in_band": 0.5036892361111112, "tokens_rate.above_band": 0.9789964994165694, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021003500583430573 }, { "epoch": 2.135705155517682, "grad_norm": 100.36409304806315, "learning_rate": 3.569433446532175e-07, "loss": 0.319, "step": 10025, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9538461538461539, "success_rate.epoch.env.logic": 0.8895348837209303, "success_rate.epoch.env.math": 0.971830985915493, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7955465587044535, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8767942506172914, "success_rate.epoch.global": 0.8862559241706162, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994375, "tokens_p.mean_in_band": 0.6436941964285714, "tokens_rate.above_band": 0.9345794392523364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06542056074766354 }, { "epoch": 2.1367703451214317, "grad_norm": 190.31628250345724, "learning_rate": 3.569092978731933e-07, "loss": 0.2563, "step": 10030, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.8926553672316384, "success_rate.epoch.env.math": 0.971830985915493, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7943548387096774, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8770550186928022, "success_rate.epoch.global": 0.8863636363636364, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998180494905385, "tokens_p.mean_in_band": 0.466796875, "tokens_rate.above_band": 0.9970972423802612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002902757619738752 }, { "epoch": 2.137835534725181, "grad_norm": 147.1014082681251, "learning_rate": 3.5687525334565063e-07, "loss": 0.2763, "step": 10035, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.8926553672316384, "success_rate.epoch.env.math": 0.9722222222222222, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.794, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8771200026608578, "success_rate.epoch.global": 0.8864696734059098, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921343537414966, "tokens_p.mean_in_band": 0.5927734375, "tokens_rate.above_band": 0.9865771812080537, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013422818791946308 }, { "epoch": 2.1389007243289306, "grad_norm": 168.98905659793235, "learning_rate": 3.568412110954009e-07, "loss": 0.2934, "step": 10040, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.8932584269662921, "success_rate.epoch.env.math": 0.9723756906077348, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7944664031620553, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8772910393541653, "success_rate.epoch.global": 0.8865740740740741, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957298136645962, "tokens_p.mean_in_band": 0.62421875, "tokens_rate.above_band": 0.9698795180722891, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030120481927710843 }, { "epoch": 2.13996591393268, "grad_norm": 68.63278084097718, "learning_rate": 3.5680717114725375e-07, "loss": 0.2753, "step": 10045, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.8950276243093923, "success_rate.epoch.env.math": 0.9726775956284153, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7956777996070727, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8775894483367833, "success_rate.epoch.global": 0.8874425727411945, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971590909090909, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9871794871794872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01282051282051282 }, { "epoch": 2.1410311035364296, "grad_norm": 125.97507517372232, "learning_rate": 3.567731335260174e-07, "loss": 0.2266, "step": 10050, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.8956043956043956, "success_rate.epoch.env.math": 0.9728260869565217, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.797270955165692, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8779265897114867, "success_rate.epoch.global": 0.8882978723404256, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985431235431236, "tokens_p.mean_in_band": 0.69921875, "tokens_rate.above_band": 0.9839449541284404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016055045871559634 }, { "epoch": 2.142096293140179, "grad_norm": 50.06901825201844, "learning_rate": 3.567390982564981e-07, "loss": 0.3495, "step": 10055, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9571428571428572, "success_rate.epoch.env.logic": 0.8956043956043956, "success_rate.epoch.env.math": 0.9703504043126685, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7953667953667953, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776060784208297, "success_rate.epoch.global": 0.8868778280542986, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971698113207547, "tokens_p.mean_below_band": 2.2118911147117615e-08, "tokens_p.mean_in_band": 0.523162841796875, "tokens_rate.above_band": 0.9397163120567376, "tokens_rate.below_band": 0.0035460992907801418, "tokens_rate.in_band": 0.05673758865248227 }, { "epoch": 2.1431614827439285, "grad_norm": 113.59391973124605, "learning_rate": 3.5670506536350055e-07, "loss": 0.2315, "step": 10060, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8961748633879781, "success_rate.epoch.env.math": 0.9707446808510638, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7946257197696737, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8776812866440331, "success_rate.epoch.global": 0.8869760479041916, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977156432748538, "tokens_p.mean_in_band": 0.7194010416666666, "tokens_rate.above_band": 0.9827586206896551, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017241379310344827 }, { "epoch": 2.1442266723476777, "grad_norm": 491.9637640602059, "learning_rate": 3.5667103487182774e-07, "loss": 0.3562, "step": 10065, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8918918918918919, "success_rate.epoch.env.math": 0.9709762532981531, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7934990439770554, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8772957688921162, "success_rate.epoch.global": 0.8863298662704309, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.000812567713976, "tokens_p.mean_in_band": 0.4486607142857143, "tokens_rate.above_band": 0.9777542372881356, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022245762711864406 }, { "epoch": 2.1452918619514274, "grad_norm": 163.9797965599951, "learning_rate": 3.5663700680628075e-07, "loss": 0.306, "step": 10070, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8924731182795699, "success_rate.epoch.env.math": 0.9712793733681462, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7950664136622391, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8775386058852294, "success_rate.epoch.global": 0.8871681415929203, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976851851851852, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.1463570515551766, "grad_norm": 49.16048371659576, "learning_rate": 3.5660298119165906e-07, "loss": 0.1745, "step": 10075, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.893048128342246, "success_rate.epoch.env.math": 0.9717223650385605, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.7958412098298677, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8777629296791726, "success_rate.epoch.global": 0.8879941434846267, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965796019900498, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9901477832512315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009852216748768473 }, { "epoch": 2.1474222411589263, "grad_norm": 76.5793998030311, "learning_rate": 3.565689580527602e-07, "loss": 0.2279, "step": 10080, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8936170212765957, "success_rate.epoch.env.math": 0.9720101781170484, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.795880149812734, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8778443520424184, "success_rate.epoch.global": 0.8880813953488372, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957107843137255, "tokens_p.mean_in_band": 0.708984375, "tokens_rate.above_band": 0.9745222929936306, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025477707006369428 }, { "epoch": 2.1484874307626756, "grad_norm": 162.09354830423786, "learning_rate": 3.565349374143801e-07, "loss": 0.345, "step": 10085, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.9743589743589743, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8947368421052632, "success_rate.epoch.env.math": 0.9720101781170484, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7981481481481482, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8870100947802416, "success_rate.epoch.global": 0.8888888888888888, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996895032051282, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9889064976228209, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011093502377179081 }, { "epoch": 2.1495526203664252, "grad_norm": 78.51081522612341, "learning_rate": 3.5650091930131275e-07, "loss": 0.4265, "step": 10090, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8958333333333334, "success_rate.epoch.env.math": 0.9722222222222222, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7977941176470589, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8871551430050763, "success_rate.epoch.global": 0.8889684813753582, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983874555160143, "tokens_p.mean_in_band": 0.66796875, "tokens_rate.above_band": 0.9808027923211169, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019197207678883072 }, { "epoch": 2.1506178099701745, "grad_norm": 109.23803165405054, "learning_rate": 3.564669037383502e-07, "loss": 0.241, "step": 10095, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8963730569948186, "success_rate.epoch.env.math": 0.972568578553616, "success_rate.epoch.env.sat": 0.2222222222222222, "success_rate.epoch.env.science": 0.7992700729927007, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8873698734903964, "success_rate.epoch.global": 0.8897581792318634, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959239130434783, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9484536082474226, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05154639175257732 }, { "epoch": 2.151682999573924, "grad_norm": 97.71526801625657, "learning_rate": 3.564328907502829e-07, "loss": 0.3166, "step": 10100, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.8974358974358975, "success_rate.epoch.env.math": 0.9727047146401985, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.7992766726943942, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8854592693092265, "success_rate.epoch.global": 0.8891242937853108, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9917091836734694, "tokens_p.mean_in_band": 0.5336538461538461, "tokens_rate.above_band": 0.8497109826589595, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15028901734104047 }, { "epoch": 2.1527481891776734, "grad_norm": 11.688365148775938, "learning_rate": 3.5639888036189906e-07, "loss": 0.2732, "step": 10105, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.898989898989899, "success_rate.epoch.env.math": 0.9728395061728395, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.8010752688172043, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.885776304691904, "success_rate.epoch.global": 0.8899018232819075, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9944196428571429, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9790209790209791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02097902097902098 }, { "epoch": 2.153813378781423, "grad_norm": 424.5865547018612, "learning_rate": 3.5636487259798545e-07, "loss": 0.3279, "step": 10110, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9577464788732394, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9731051344743277, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.8021390374331551, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8860078080796328, "success_rate.epoch.global": 0.8906685236768802, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9921875, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.1548785683851728, "grad_norm": 94.85832224596984, "learning_rate": 3.563308674833265e-07, "loss": 0.3266, "step": 10115, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:sciworld": 0.975, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9583333333333334, "success_rate.epoch.env.logic": 0.900497512437811, "success_rate.epoch.env.math": 0.9731051344743277, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8024691358024691, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8845841777884936, "success_rate.epoch.global": 0.8900414937759336, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979440789473685, "tokens_p.mean_in_band": 0.623095703125, "tokens_rate.above_band": 0.9785407725321889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02145922746781116 }, { "epoch": 2.155943757988922, "grad_norm": 93.90254543878595, "learning_rate": 3.5629686504270506e-07, "loss": 0.1606, "step": 10120, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9583333333333334, "success_rate.epoch.env.logic": 0.9014778325123153, "success_rate.epoch.env.math": 0.973170731707317, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8038528896672504, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8849748793308326, "success_rate.epoch.global": 0.8907967032967034, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980502599653379, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.1570089475926717, "grad_norm": 624.0064901894963, "learning_rate": 3.562628653009017e-07, "loss": 0.4398, "step": 10125, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.958904109589041, "success_rate.epoch.env.logic": 0.8975609756097561, "success_rate.epoch.env.math": 0.9732360097323601, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8034782608695652, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.882845945033624, "success_rate.epoch.global": 0.8894952251023193, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.997885687732342, "tokens_p.mean_in_band": 0.5966796875, "tokens_rate.above_band": 0.9853479853479854, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014652014652014652 }, { "epoch": 2.158074137196421, "grad_norm": 38.12365135895893, "learning_rate": 3.5622886828269516e-07, "loss": 0.142, "step": 10130, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.958904109589041, "success_rate.epoch.env.logic": 0.8985507246376812, "success_rate.epoch.env.math": 0.9733656174334141, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.803448275862069, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8829449788264859, "success_rate.epoch.global": 0.8895663956639567, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975341426403642, "tokens_p.mean_in_band": 0.7297585227272727, "tokens_rate.above_band": 0.9835820895522388, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016417910447761194 }, { "epoch": 2.1591393268001706, "grad_norm": 220.53278906205063, "learning_rate": 3.561948740128625e-07, "loss": 0.3503, "step": 10135, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9347826086956522, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.96, "success_rate.epoch.env.logic": 0.8995215311004785, "success_rate.epoch.env.math": 0.9734939759036144, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8047945205479452, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8832669151928344, "success_rate.epoch.global": 0.8903095558546433, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0001228380503144, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9945269741985927, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00547302580140735 }, { "epoch": 2.16020451640392, "grad_norm": 112.18080955828253, "learning_rate": 3.561608825161782e-07, "loss": 0.2632, "step": 10140, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9605263157894737, "success_rate.epoch.env.logic": 0.9014084507042254, "success_rate.epoch.env.math": 0.973621103117506, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8037542662116041, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8815951985197795, "success_rate.epoch.global": 0.8897058823529411, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986103192702395, "tokens_p.mean_in_band": 0.5463169642857143, "tokens_rate.above_band": 0.9920814479638009, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007918552036199095 }, { "epoch": 2.1612697060076695, "grad_norm": 144.60601412089292, "learning_rate": 3.561268938174151e-07, "loss": 0.241, "step": 10145, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.9027777777777778, "success_rate.epoch.env.math": 0.9737470167064439, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8047538200339559, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8819140112683095, "success_rate.epoch.global": 0.8904382470119522, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993169398907104, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.997275204359673, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0027247956403269754 }, { "epoch": 2.162334895611419, "grad_norm": 102.5551948515419, "learning_rate": 3.56092907941344e-07, "loss": 0.2562, "step": 10150, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.975609756097561, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.9741176470588235, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8057432432432432, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.882078382380628, "success_rate.epoch.global": 0.8911609498680739, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9898174157303371, "tokens_p.mean_in_band": 0.748046875, "tokens_rate.above_band": 0.9175257731958762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08247422680412371 }, { "epoch": 2.1634000852151685, "grad_norm": 98.02656807287734, "learning_rate": 3.560589249127335e-07, "loss": 0.1623, "step": 10155, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9615384615384616, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.9744186046511628, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8070469798657718, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8822770564086082, "success_rate.epoch.global": 0.8918741808650066, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967177242888403, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9978165938864629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002183406113537118 }, { "epoch": 2.1644652748189177, "grad_norm": 102.3557858066811, "learning_rate": 3.5602494475635026e-07, "loss": 0.1352, "step": 10160, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9761904761904762, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.9041095890410958, "success_rate.epoch.env.math": 0.9747126436781609, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8076923076923077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8824870568049946, "success_rate.epoch.global": 0.892578125, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967296511627907, "tokens_p.mean_in_band": 0.8510044642857143, "tokens_rate.above_band": 0.98005698005698, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019943019943019943 }, { "epoch": 2.1655304644226674, "grad_norm": 145.79749909634245, "learning_rate": 3.559909674969587e-07, "loss": 0.2773, "step": 10165, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.9049773755656109, "success_rate.epoch.env.math": 0.9749430523917996, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8086522462562395, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8827244971375513, "success_rate.epoch.global": 0.8932729624838293, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974291590493601, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9963570127504554, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0036429872495446266 }, { "epoch": 2.1665956540264166, "grad_norm": 153.5381911864979, "learning_rate": 3.5595699315932114e-07, "loss": 0.3574, "step": 10170, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.9054054054054054, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.18181818181818182, "success_rate.epoch.env.science": 0.8108552631578947, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8829866437287343, "success_rate.epoch.global": 0.8939588688946015, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961222627737226, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9927536231884058, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007246376811594203 }, { "epoch": 2.1676608436301663, "grad_norm": 148.2431649183385, "learning_rate": 3.5592302176819803e-07, "loss": 0.2617, "step": 10175, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9620253164556962, "success_rate.epoch.env.logic": 0.90625, "success_rate.epoch.env.math": 0.9752252252252253, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8117839607201309, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8817909166587833, "success_rate.epoch.global": 0.8939974457215837, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967672413793104, "tokens_p.mean_in_band": 0.6964285714285714, "tokens_rate.above_band": 0.9119496855345912, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0880503144654088 }, { "epoch": 2.1687260332339156, "grad_norm": 243.756584530022, "learning_rate": 3.558890533483473e-07, "loss": 0.2382, "step": 10180, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9625, "success_rate.epoch.env.logic": 0.9070796460176991, "success_rate.epoch.env.math": 0.9752808988764045, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8133116883116883, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8820707338912055, "success_rate.epoch.global": 0.8946700507614214, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982763788968825, "tokens_p.mean_in_band": 0.806640625, "tokens_rate.above_band": 0.9904988123515439, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009501187648456057 }, { "epoch": 2.1697912228376652, "grad_norm": 109.96009841477199, "learning_rate": 3.558550879245249e-07, "loss": 0.2608, "step": 10185, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.9148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9074889867841409, "success_rate.epoch.env.math": 0.9754464285714286, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8129032258064516, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8821447782132661, "success_rate.epoch.global": 0.894703656998739, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995181718061674, "tokens_p.mean_in_band": 0.7135416666666666, "tokens_rate.above_band": 0.9869565217391304, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013043478260869565 }, { "epoch": 2.1708564124414145, "grad_norm": 93.62406945200333, "learning_rate": 3.558211255214847e-07, "loss": 0.2817, "step": 10190, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9767441860465116, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9082969432314411, "success_rate.epoch.env.math": 0.9755555555555555, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8138041733547352, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8824876201046551, "success_rate.epoch.global": 0.8953634085213033, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984800583657587, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9961240310077519, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003875968992248062 }, { "epoch": 2.171921602045164, "grad_norm": 82.59491410077452, "learning_rate": 3.5578716616397814e-07, "loss": 0.1769, "step": 10195, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.908695652173913, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8142857142857143, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8826206193448072, "success_rate.epoch.global": 0.8953922789539228, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934701492537313, "tokens_p.mean_in_band": 0.634375, "tokens_rate.above_band": 0.9305555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06944444444444445 }, { "epoch": 2.1729867916489134, "grad_norm": 70.72759597959033, "learning_rate": 3.5575320987675463e-07, "loss": 0.1978, "step": 10200, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9094827586206896, "success_rate.epoch.env.math": 0.9757709251101322, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.815748031496063, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8828397641329613, "success_rate.epoch.global": 0.8960396039603961, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970858134920635, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.174051981252663, "grad_norm": 57.463233844692645, "learning_rate": 3.5571925668456124e-07, "loss": 0.1043, "step": 10205, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9102564102564102, "success_rate.epoch.env.math": 0.975929978118162, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8171875, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8830554162372966, "success_rate.epoch.global": 0.8966789667896679, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9903846153846154, "tokens_p.mean_in_band": 0.8020833333333334, "tokens_rate.above_band": 0.975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025 }, { "epoch": 2.1751171708564123, "grad_norm": 113.58682220156203, "learning_rate": 3.556853066121428e-07, "loss": 0.2097, "step": 10210, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9166666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9629629629629629, "success_rate.epoch.env.logic": 0.9071729957805907, "success_rate.epoch.env.math": 0.9761388286334056, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8177570093457944, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8828618147810883, "success_rate.epoch.global": 0.8966992665036675, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951923076923077, "tokens_p.mean_in_band": 0.7373046875, "tokens_rate.above_band": 0.9381443298969072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061855670103092786 }, { "epoch": 2.176182360460162, "grad_norm": 145.70704886424622, "learning_rate": 3.5565135968424194e-07, "loss": 0.3638, "step": 10215, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9772727272727273, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9634146341463414, "success_rate.epoch.env.logic": 0.9083333333333333, "success_rate.epoch.env.math": 0.9762419006479481, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8186046511627907, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8832493968574309, "success_rate.epoch.global": 0.8973268529769137, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0018138801261829, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9993694829760403, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0006305170239596469 }, { "epoch": 2.1772475500639112, "grad_norm": 91.29467156649099, "learning_rate": 3.556174159255989e-07, "loss": 0.124, "step": 10220, "success_rate.epoch.env.abd": 0.9871794871794872, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9634146341463414, "success_rate.epoch.env.logic": 0.9087136929460581, "success_rate.epoch.env.math": 0.9764957264957265, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8188854489164087, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8834091619152038, "success_rate.epoch.global": 0.8979468599033816, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988132911392406, "tokens_p.mean_in_band": 0.783203125, "tokens_rate.above_band": 0.9875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0125 }, { "epoch": 2.178312739667661, "grad_norm": 124.59608843676052, "learning_rate": 3.5558347536095157e-07, "loss": 0.1955, "step": 10225, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.963855421686747, "success_rate.epoch.env.logic": 0.9094650205761317, "success_rate.epoch.env.math": 0.9765957446808511, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.82, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8836427045889862, "success_rate.epoch.global": 0.8985594237695078, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974933155080213, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9929203539823008, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007079646017699115 }, { "epoch": 2.17937792927141, "grad_norm": 91.12728799105118, "learning_rate": 3.555495380150357e-07, "loss": 0.2989, "step": 10230, "success_rate.epoch.env.abd": 0.9875, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9098360655737705, "success_rate.epoch.env.math": 0.9767441860465116, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8211009174311926, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8827612648014224, "success_rate.epoch.global": 0.8985680190930787, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9869237588652482, "tokens_p.mean_in_band": 0.6725, "tokens_rate.above_band": 0.818577648766328, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18142235123367198 }, { "epoch": 2.18044311887516, "grad_norm": 158.24440444924429, "learning_rate": 3.555156039125846e-07, "loss": 0.2805, "step": 10235, "success_rate.epoch.env.abd": 0.9876543209876543, "success_rate.epoch.env.agentgym:alfworld": 0.9183673469387755, "success_rate.epoch.env.agentgym:sciworld": 0.9777777777777777, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9102040816326531, "success_rate.epoch.env.math": 0.9767932489451476, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8212121212121212, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8828233196856133, "success_rate.epoch.global": 0.8985765124555161, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999375, "tokens_p.mean_in_band": 0.471875, "tokens_rate.above_band": 0.9917355371900827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008264462809917356 }, { "epoch": 2.181508308478909, "grad_norm": 29.247346444365288, "learning_rate": 3.5548167307832904e-07, "loss": 0.4119, "step": 10240, "success_rate.epoch.env.abd": 0.9876543209876543, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9787234042553191, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9112903225806451, "success_rate.epoch.env.math": 0.9768421052631578, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8220211161387632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8814162625702872, "success_rate.epoch.global": 0.8985849056603774, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980796089385475, "tokens_p.mean_in_band": 0.57421875, "tokens_rate.above_band": 0.9944444444444445, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005555555555555556 }, { "epoch": 2.1825734980826588, "grad_norm": 41.530817869612875, "learning_rate": 3.554477455369977e-07, "loss": 0.2657, "step": 10245, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9787234042553191, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9116465863453815, "success_rate.epoch.env.math": 0.9770354906054279, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8228228228228228, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8815528001023046, "success_rate.epoch.global": 0.8991793669402111, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983552631578947, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.183638687686408, "grad_norm": 255.64992913836087, "learning_rate": 3.5541382131331677e-07, "loss": 0.3722, "step": 10250, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9791666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9123505976095617, "success_rate.epoch.env.math": 0.9751552795031055, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8236173393124065, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.881558398198922, "success_rate.epoch.global": 0.8991841491841492, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974112426035503, "tokens_p.mean_below_band": 9.255018085241318e-09, "tokens_p.mean_in_band": 0.753515625, "tokens_rate.above_band": 0.9657142857142857, "tokens_rate.below_band": 0.005714285714285714, "tokens_rate.in_band": 0.02857142857142857 }, { "epoch": 2.1847038772901577, "grad_norm": 183.63775688535384, "learning_rate": 3.553799004320098e-07, "loss": 0.2492, "step": 10255, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9087301587301587, "success_rate.epoch.env.math": 0.9752066115702479, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8249258160237388, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8814048953941249, "success_rate.epoch.global": 0.899188876013905, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988540870893812, "tokens_p.mean_in_band": 0.446875, "tokens_rate.above_band": 0.9849510910458992, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015048908954100828 }, { "epoch": 2.185769066893907, "grad_norm": 103.67100506622471, "learning_rate": 3.553459829177982e-07, "loss": 0.1638, "step": 10260, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9754601226993865, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8251851851851851, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.880419489716206, "success_rate.epoch.global": 0.8991935483870968, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936583129584352, "tokens_p.mean_in_band": 0.69609375, "tokens_rate.above_band": 0.964622641509434, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03537735849056604 }, { "epoch": 2.1868342564976566, "grad_norm": 143.77941569645327, "learning_rate": 3.553120687954009e-07, "loss": 0.4796, "step": 10265, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.905511811023622, "success_rate.epoch.env.math": 0.9755102040816327, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.825, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8803086896122264, "success_rate.epoch.global": 0.898567335243553, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.996179706601467, "tokens_p.mean_in_band": 0.6774553571428571, "tokens_rate.above_band": 0.9831730769230769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016826923076923076 }, { "epoch": 2.187899446101406, "grad_norm": 169.1682516071941, "learning_rate": 3.5527815808953417e-07, "loss": 0.3989, "step": 10270, "success_rate.epoch.env.abd": 0.9879518072289156, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9058823529411765, "success_rate.epoch.env.math": 0.973630831643002, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8233576642335766, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8800697168624271, "success_rate.epoch.global": 0.8974358974358975, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9935213414634146, "tokens_p.mean_in_band": 0.64, "tokens_rate.above_band": 0.8677248677248677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13227513227513227 }, { "epoch": 2.1889646357051555, "grad_norm": 64.89640814461663, "learning_rate": 3.5524425082491184e-07, "loss": 0.2961, "step": 10275, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9058823529411765, "success_rate.epoch.env.math": 0.9738430583501007, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8231884057971014, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8800866622385132, "success_rate.epoch.global": 0.8974504249291785, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9880681818181818, "tokens_p.mean_in_band": 0.5217633928571429, "tokens_rate.above_band": 0.9401709401709402, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05982905982905983 }, { "epoch": 2.1900298253089048, "grad_norm": 352.2383096109671, "learning_rate": 3.552103470262453e-07, "loss": 0.249, "step": 10280, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.90625, "success_rate.epoch.env.math": 0.974, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8247126436781609, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8802729192012205, "success_rate.epoch.global": 0.8980281690140846, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9876179245283019, "tokens_p.mean_in_band": 0.751953125, "tokens_rate.above_band": 0.9464285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05357142857142857 }, { "epoch": 2.1910950149126545, "grad_norm": 151.01735787478043, "learning_rate": 3.5517644671824345e-07, "loss": 0.2639, "step": 10285, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9066147859922179, "success_rate.epoch.env.math": 0.9742574257425742, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8257142857142857, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.880420542271304, "success_rate.epoch.global": 0.8985994397759104, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9916237113402062, "tokens_p.mean_in_band": 0.8072916666666666, "tokens_rate.above_band": 0.97, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03 }, { "epoch": 2.192160204516404, "grad_norm": 125.98396183994296, "learning_rate": 3.551425499256126e-07, "loss": 0.198, "step": 10290, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9076923076923077, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.826950354609929, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.880640100381498, "success_rate.epoch.global": 0.8991643454038997, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991964285714285, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.1932253941201534, "grad_norm": 98.91486294312406, "learning_rate": 3.551086566730564e-07, "loss": 0.362, "step": 10295, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.9795918367346939, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9076923076923077, "success_rate.epoch.env.math": 0.9744597249508841, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8260869565217391, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.880570768790927, "success_rate.epoch.global": 0.8986149584487535, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9900990099009901, "tokens_p.mean_in_band": 0.511328125, "tokens_rate.above_band": 0.9099099099099099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09009009009009009 }, { "epoch": 2.1942905837239026, "grad_norm": 112.55139212232042, "learning_rate": 3.5507476698527613e-07, "loss": 0.2197, "step": 10300, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9076923076923077, "success_rate.epoch.env.math": 0.9745098039215686, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.827538247566064, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8808361900900558, "success_rate.epoch.global": 0.8991735537190083, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981352880658436, "tokens_p.mean_in_band": 0.6744791666666666, "tokens_rate.above_band": 0.9969230769230769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003076923076923077 }, { "epoch": 2.1953557733276523, "grad_norm": 40.75117826753435, "learning_rate": 3.550408808869703e-07, "loss": 0.2322, "step": 10305, "success_rate.epoch.env.abd": 0.9880952380952381, "success_rate.epoch.env.agentgym:alfworld": 0.9019607843137255, "success_rate.epoch.env.agentgym:sciworld": 0.98, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9080459770114943, "success_rate.epoch.env.math": 0.974757281553398, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8284923928077456, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8809775802893918, "success_rate.epoch.global": 0.8997260273972603, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960526315789474, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.196420962931402, "grad_norm": 121.9444776883651, "learning_rate": 3.550069984028348e-07, "loss": 0.3273, "step": 10310, "success_rate.epoch.env.abd": 0.9882352941176471, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9045801526717557, "success_rate.epoch.env.math": 0.9748062015503876, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8294360385144429, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8810169188593856, "success_rate.epoch.global": 0.8997275204359673, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989792713567839, "tokens_p.mean_in_band": 0.49594907407407407, "tokens_rate.above_band": 0.9778869778869779, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022113022113022112 }, { "epoch": 2.1974861525351512, "grad_norm": 47.834997807906134, "learning_rate": 3.5497311955756303e-07, "loss": 0.197, "step": 10315, "success_rate.epoch.env.abd": 0.9882352941176471, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9049429657794676, "success_rate.epoch.env.math": 0.9749518304431599, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8308321964529332, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8811900643083832, "success_rate.epoch.global": 0.9002710027100271, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951704545454545, "tokens_p.mean_in_band": 0.80859375, "tokens_rate.above_band": 0.9821428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017857142857142856 }, { "epoch": 2.198551342138901, "grad_norm": 439.5282198262885, "learning_rate": 3.5493924437584555e-07, "loss": 0.263, "step": 10320, "success_rate.epoch.env.abd": 0.9883720930232558, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9803921568627451, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.9053030303030303, "success_rate.epoch.env.math": 0.975095785440613, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8310626702997275, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.879378690161132, "success_rate.epoch.global": 0.8997304582210243, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9915659617321249, "tokens_p.mean_in_band": 0.7450635302197802, "tokens_rate.above_band": 0.9160516605166051, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08394833948339483 }, { "epoch": 2.19961653174265, "grad_norm": 761.9837880416337, "learning_rate": 3.549053728823704e-07, "loss": 0.2322, "step": 10325, "success_rate.epoch.env.abd": 0.9883720930232558, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.9056603773584906, "success_rate.epoch.env.math": 0.9752851711026616, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8319783197831978, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8795459134436289, "success_rate.epoch.global": 0.9002680965147453, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9952083333333334, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9933774834437086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006622516556291391 }, { "epoch": 2.2006817213464, "grad_norm": 90.05011135362957, "learning_rate": 3.548715051018229e-07, "loss": 0.2618, "step": 10330, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9807692307692307, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.9060150375939849, "success_rate.epoch.env.math": 0.9753787878787878, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8331090174966352, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8797134812016438, "success_rate.epoch.global": 0.9008, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956395348837209, "tokens_p.mean_in_band": 0.6315104166666666, "tokens_rate.above_band": 0.9772727272727273, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022727272727272728 }, { "epoch": 2.201746910950149, "grad_norm": 116.87056608849511, "learning_rate": 3.548376410588856e-07, "loss": 0.3251, "step": 10335, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9811320754716981, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.946236559139785, "success_rate.epoch.env.logic": 0.9063670411985019, "success_rate.epoch.env.math": 0.975517890772128, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8326639892904953, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8797506560192971, "success_rate.epoch.global": 0.9007957559681697, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982541899441341, "tokens_p.mean_in_band": 0.7131696428571429, "tokens_rate.above_band": 0.9808219178082191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019178082191780823 }, { "epoch": 2.2028121005538988, "grad_norm": 102.9445188138565, "learning_rate": 3.548037807782385e-07, "loss": 0.1565, "step": 10340, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9468085106382979, "success_rate.epoch.env.logic": 0.9067164179104478, "success_rate.epoch.env.math": 0.9757462686567164, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8331108144192256, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8799275594041663, "success_rate.epoch.global": 0.9013192612137203, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980837264150944, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9953051643192489, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004694835680751174 }, { "epoch": 2.203877290157648, "grad_norm": 203.637264163332, "learning_rate": 3.547699242845585e-07, "loss": 0.3841, "step": 10345, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9468085106382979, "success_rate.epoch.env.logic": 0.9067164179104478, "success_rate.epoch.env.math": 0.9759704251386322, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8342175066312998, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8800485456490744, "success_rate.epoch.global": 0.9018372703412073, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969907407407408, "tokens_p.mean_in_band": 0.873046875, "tokens_rate.above_band": 0.9712230215827338, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02877697841726619 }, { "epoch": 2.2049424797613977, "grad_norm": 121.72233733345384, "learning_rate": 3.547360716025202e-07, "loss": 0.4605, "step": 10350, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9814814814814815, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9080882352941176, "success_rate.epoch.env.math": 0.9760589318600368, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8309114927344782, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8799316566147641, "success_rate.epoch.global": 0.9007832898172323, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9978070175438597, "tokens_p.mean_in_band": 0.5127650669642857, "tokens_rate.above_band": 0.9606741573033708, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03932584269662921 }, { "epoch": 2.206007669365147, "grad_norm": 475.99020721070815, "learning_rate": 3.54702222756795e-07, "loss": 0.307, "step": 10355, "success_rate.epoch.env.abd": 0.9886363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9479166666666666, "success_rate.epoch.env.logic": 0.9080882352941176, "success_rate.epoch.env.math": 0.9761904761904762, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8318002628120894, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8801048621019265, "success_rate.epoch.global": 0.9012987012987013, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983407079646017, "tokens_p.mean_in_band": 0.8466796875, "tokens_rate.above_band": 0.9964726631393298, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003527336860670194 }, { "epoch": 2.2070728589688966, "grad_norm": 65.40048558684627, "learning_rate": 3.546683777720518e-07, "loss": 0.2953, "step": 10360, "success_rate.epoch.env.abd": 0.9887640449438202, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9479166666666666, "success_rate.epoch.env.logic": 0.9087591240875912, "success_rate.epoch.env.math": 0.97632058287796, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8326797385620915, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8802692396054188, "success_rate.epoch.global": 0.9018087855297158, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995049504950495, "tokens_p.mean_in_band": 0.78515625, "tokens_rate.above_band": 0.9805825242718447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019417475728155338 }, { "epoch": 2.208138048572646, "grad_norm": 348.89387099802366, "learning_rate": 3.546345366729566e-07, "loss": 0.5394, "step": 10365, "success_rate.epoch.env.abd": 0.9887640449438202, "success_rate.epoch.env.agentgym:alfworld": 0.9038461538461539, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9484536082474226, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9764065335753176, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8326848249027238, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8803564908437884, "success_rate.epoch.global": 0.9017994858611825, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956597222222222, "tokens_p.mean_in_band": 0.7035590277777778, "tokens_rate.above_band": 0.9411764705882353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058823529411764705 }, { "epoch": 2.2092032381763955, "grad_norm": 59.11365061195948, "learning_rate": 3.546006994841725e-07, "loss": 0.1907, "step": 10370, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9094202898550725, "success_rate.epoch.env.math": 0.9765342960288809, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8320413436692506, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8805636465738764, "success_rate.epoch.global": 0.9017902813299232, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988154332129964, "tokens_p.mean_below_band": 1.525040715932846e-08, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9964028776978417, "tokens_rate.below_band": 0.0017985611510791368, "tokens_rate.in_band": 0.0017985611510791368 }, { "epoch": 2.2102684277801448, "grad_norm": 80.78888263581783, "learning_rate": 3.5456686623035987e-07, "loss": 0.4095, "step": 10375, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9494949494949495, "success_rate.epoch.env.logic": 0.9097472924187726, "success_rate.epoch.env.math": 0.9765765765765766, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8322663252240717, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8806645213306431, "success_rate.epoch.global": 0.9017811704834605, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942239336492891, "tokens_p.mean_in_band": 0.63232421875, "tokens_rate.above_band": 0.9634703196347032, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0365296803652968 }, { "epoch": 2.2113336173838944, "grad_norm": 2.2558260457402066, "learning_rate": 3.54533036936176e-07, "loss": 0.2063, "step": 10380, "success_rate.epoch.env.abd": 0.9888888888888889, "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.9818181818181818, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.910394265232975, "success_rate.epoch.env.math": 0.9767025089605734, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8326947637292464, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8800175090394302, "success_rate.epoch.global": 0.9017721518987342, "success_rate.window.env.ded": 0.6666666666666666, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933997844827587, "tokens_p.mean_in_band": 0.58, "tokens_rate.above_band": 0.8743718592964824, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12562814070351758 }, { "epoch": 2.2123988069876437, "grad_norm": 0.0, "learning_rate": 3.5449921162627557e-07, "loss": 0.1028, "step": 10385, "success_rate.epoch.env.abd": 0.9891304347826086, "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9411764705882353, "success_rate.epoch.env.logic": 0.9113475177304965, "success_rate.epoch.env.math": 0.9767857142857143, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8331210191082803, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8802019576230748, "success_rate.epoch.global": 0.9022670025188917, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984032846715328, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9989583333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0010416666666666667 }, { "epoch": 2.2134639965913934, "grad_norm": 53.84760187355933, "learning_rate": 3.544653903253102e-07, "loss": 0.213, "step": 10390, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.941747572815534, "success_rate.epoch.env.logic": 0.9113475177304965, "success_rate.epoch.env.math": 0.9768683274021353, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8331226295828066, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8802721578834017, "success_rate.epoch.global": 0.9022556390977443, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976415094339622, "tokens_p.mean_in_band": 0.3755580357142857, "tokens_rate.above_band": 0.9098712446351931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09012875536480687 }, { "epoch": 2.2145291861951426, "grad_norm": 146.0855247999162, "learning_rate": 3.5443157305792855e-07, "loss": 0.5253, "step": 10395, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.941747572815534, "success_rate.epoch.env.logic": 0.9122807017543859, "success_rate.epoch.env.math": 0.9769911504424779, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8337531486146096, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8805842993511246, "success_rate.epoch.global": 0.9027431421446384, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967592592592592, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9975369458128078, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0024630541871921183 }, { "epoch": 2.2155943757988923, "grad_norm": 113.27216231718302, "learning_rate": 3.543977598487764e-07, "loss": 0.1942, "step": 10400, "success_rate.epoch.env.abd": 0.989247311827957, "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.9821428571428571, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9423076923076923, "success_rate.epoch.env.logic": 0.9122807017543859, "success_rate.epoch.env.math": 0.9771929824561404, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8345864661654135, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8807293238108177, "success_rate.epoch.global": 0.9032258064516129, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9924202127659575, "tokens_p.mean_in_band": 0.7137276785714286, "tokens_rate.above_band": 0.9710743801652892, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028925619834710745 }, { "epoch": 2.2166595654026415, "grad_norm": 68.29868150681403, "learning_rate": 3.543639507224967e-07, "loss": 0.2539, "step": 10405, "success_rate.epoch.env.abd": 0.9893617021276596, "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9125874125874126, "success_rate.epoch.env.math": 0.9773123909249564, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.83375, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8808298566064136, "success_rate.epoch.global": 0.9032098765432098, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980458311063602, "tokens_p.mean_in_band": 0.66015625, "tokens_rate.above_band": 0.9978666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0021333333333333334 }, { "epoch": 2.217724755006391, "grad_norm": 69.45311921574765, "learning_rate": 3.5433014570372913e-07, "loss": 0.208, "step": 10410, "success_rate.epoch.env.abd": 0.9893617021276596, "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9131944444444444, "success_rate.epoch.env.math": 0.9773913043478261, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8337468982630273, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8808919332921344, "success_rate.epoch.global": 0.9031941031941032, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966397849462365, "tokens_p.mean_in_band": 0.76875, "tokens_rate.above_band": 0.9823943661971831, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017605633802816902 }, { "epoch": 2.2187899446101405, "grad_norm": 163.18710301265094, "learning_rate": 3.5429634481711073e-07, "loss": 0.2559, "step": 10415, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9134948096885813, "success_rate.epoch.env.math": 0.9775474956822107, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8320987654320988, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8807937883676309, "success_rate.epoch.global": 0.9026894865525672, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9931469298245614, "tokens_p.mean_in_band": 0.5030048076923077, "tokens_rate.above_band": 0.8976377952755905, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10236220472440945 }, { "epoch": 2.21985513421389, "grad_norm": 102.65183870658844, "learning_rate": 3.5426254808727513e-07, "loss": 0.1708, "step": 10420, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.911864406779661, "success_rate.epoch.env.math": 0.9775862068965517, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8314883148831488, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8805935936182191, "success_rate.epoch.global": 0.9021897810218978, "success_rate.window.env.logic": 0.8333333333333334, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975436681222707, "tokens_p.mean_in_band": 0.6029094827586207, "tokens_rate.above_band": 0.9594972067039106, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040502793296089384 }, { "epoch": 2.2209203238176394, "grad_norm": 29.948138320034516, "learning_rate": 3.542287555388533e-07, "loss": 0.2419, "step": 10425, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.912751677852349, "success_rate.epoch.env.math": 0.9777777777777777, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8316953316953317, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8808635354772734, "success_rate.epoch.global": 0.9026634382566586, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986595174262735, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.9946666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005333333333333333 }, { "epoch": 2.221985513421389, "grad_norm": 43.04740596362637, "learning_rate": 3.54194967196473e-07, "loss": 0.2223, "step": 10430, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9824561403508771, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.912751677852349, "success_rate.epoch.env.math": 0.9779286926994907, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8329268292682926, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8809892093404256, "success_rate.epoch.global": 0.9031325301204819, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923295454545454, "tokens_p.mean_in_band": 0.7208806818181818, "tokens_rate.above_band": 0.9090909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09090909090909091 }, { "epoch": 2.2230507030251383, "grad_norm": 175.9974499026715, "learning_rate": 3.541611830847588e-07, "loss": 0.182, "step": 10435, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9130434782608695, "success_rate.epoch.env.math": 0.9779661016949153, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8343409915356711, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8811751958867986, "success_rate.epoch.global": 0.9035971223021583, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954545454545455, "tokens_p.mean_in_band": 0.703125, "tokens_rate.above_band": 0.9763313609467456, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023668639053254437 }, { "epoch": 2.224115892628888, "grad_norm": 175.09294908698772, "learning_rate": 3.5412740322833246e-07, "loss": 0.2044, "step": 10440, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9141914191419142, "success_rate.epoch.env.math": 0.9780775716694773, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8349397590361446, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8813441211918968, "success_rate.epoch.global": 0.9040572792362769, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982798165137615, "tokens_p.mean_in_band": 0.80859375, "tokens_rate.above_band": 0.9732142857142857, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026785714285714284 }, { "epoch": 2.225181082232637, "grad_norm": 139.77944157282403, "learning_rate": 3.540936276518125e-07, "loss": 0.1099, "step": 10445, "success_rate.epoch.env.abd": 0.9894736842105263, "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9144736842105263, "success_rate.epoch.env.math": 0.9781879194630873, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8349282296650717, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8813787651456376, "success_rate.epoch.global": 0.9040380047505938, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916460396039604, "tokens_p.mean_in_band": 0.7787642045454546, "tokens_rate.above_band": 0.9017857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09821428571428571 }, { "epoch": 2.226246271836387, "grad_norm": 95.48067858615772, "learning_rate": 3.5405985637981417e-07, "loss": 0.4742, "step": 10450, "success_rate.epoch.env.abd": 0.9896907216494846, "success_rate.epoch.env.agentgym:alfworld": 0.9090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9827586206896551, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9433962264150944, "success_rate.epoch.env.logic": 0.9147540983606557, "success_rate.epoch.env.math": 0.9782608695652174, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.835909631391201, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8815198381835782, "success_rate.epoch.global": 0.9044917257683215, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964028776978417, "tokens_p.mean_in_band": 0.7825520833333334, "tokens_rate.above_band": 0.9788732394366197, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02112676056338028 }, { "epoch": 2.227311461440136, "grad_norm": 235.28559288404165, "learning_rate": 3.540260894369499e-07, "loss": 0.3454, "step": 10455, "success_rate.epoch.env.abd": 0.9896907216494846, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9830508474576272, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9147540983606557, "success_rate.epoch.env.math": 0.9782971619365609, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8358913813459268, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8801203391008546, "success_rate.epoch.global": 0.904, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974760892667375, "tokens_p.mean_in_band": 0.400390625, "tokens_rate.above_band": 0.9957671957671957, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004232804232804233 }, { "epoch": 2.228376651043886, "grad_norm": 160.6381780763142, "learning_rate": 3.5399232684782866e-07, "loss": 0.23, "step": 10460, "success_rate.epoch.env.abd": 0.9897959183673469, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9153094462540716, "success_rate.epoch.env.math": 0.9783333333333334, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8364705882352941, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8803108884810116, "success_rate.epoch.global": 0.9044496487119438, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973723723723724, "tokens_p.mean_in_band": 0.8645833333333334, "tokens_rate.above_band": 0.9910714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008928571428571428 }, { "epoch": 2.2294418406476355, "grad_norm": 59.72525531913584, "learning_rate": 3.5395856863705647e-07, "loss": 0.1808, "step": 10465, "success_rate.epoch.env.abd": 0.9897959183673469, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9158576051779935, "success_rate.epoch.env.math": 0.978405315614618, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8364485981308412, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8803652658538074, "success_rate.epoch.global": 0.9044289044289044, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9911764705882353, "tokens_p.mean_below_band": 5.617039278149605e-09, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9826589595375722, "tokens_rate.below_band": 0.005780346820809248, "tokens_rate.in_band": 0.011560693641618497 }, { "epoch": 2.2305070302513847, "grad_norm": 82.93498714340033, "learning_rate": 3.5392481482923607e-07, "loss": 0.1552, "step": 10470, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9838709677419355, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9439252336448598, "success_rate.epoch.env.logic": 0.9161290322580645, "success_rate.epoch.env.math": 0.9785123966942149, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8372093023255814, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8804782007525392, "success_rate.epoch.global": 0.9048723897911833, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990717821782178, "tokens_p.mean_in_band": 0.673828125, "tokens_rate.above_band": 0.9921414538310412, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007858546168958742 }, { "epoch": 2.231572219855134, "grad_norm": 26.753714481629693, "learning_rate": 3.538910654489669e-07, "loss": 0.1306, "step": 10475, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9785123966942149, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.838150289017341, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.880683096051175, "success_rate.epoch.global": 0.9053117782909931, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982352941176471, "tokens_p.mean_in_band": 0.8177083333333334, "tokens_rate.above_band": 0.9929906542056075, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007009345794392523 }, { "epoch": 2.2326374094588837, "grad_norm": 2825.53851744974, "learning_rate": 3.5385732052084536e-07, "loss": 0.5081, "step": 10480, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.9841269841269841, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9454545454545454, "success_rate.epoch.env.logic": 0.9171974522292994, "success_rate.epoch.env.math": 0.9785478547854786, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8390804597701149, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8809109610890634, "success_rate.epoch.global": 0.9057471264367816, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994201030927835, "tokens_p.mean_in_band": 0.845703125, "tokens_rate.above_band": 0.9918200408997955, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0081799591002045 }, { "epoch": 2.2337025990626334, "grad_norm": 49.641437949288076, "learning_rate": 3.538235800694645e-07, "loss": 0.1971, "step": 10485, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9174603174603174, "success_rate.epoch.env.math": 0.9770491803278688, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8396334478808706, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8809161061116616, "success_rate.epoch.global": 0.905720823798627, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993629476584022, "tokens_p.mean_in_band": 0.56988525390625, "tokens_rate.above_band": 0.989100817438692, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010899182561307902 }, { "epoch": 2.2347677886663826, "grad_norm": 51.797906689952775, "learning_rate": 3.537898441194141e-07, "loss": 0.2071, "step": 10490, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8928571428571429, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9182389937106918, "success_rate.epoch.env.math": 0.9771986970684039, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8401826484018264, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8810504146127404, "success_rate.epoch.global": 0.9061503416856492, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9901685393258427, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.2358329782701323, "grad_norm": 122.66354994853877, "learning_rate": 3.5375611269528063e-07, "loss": 0.3037, "step": 10495, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8771929824561403, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.91875, "success_rate.epoch.env.math": 0.9773095623987034, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8397727272727272, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8796456682572408, "success_rate.epoch.global": 0.9056689342403628, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99755859375, "tokens_p.mean_in_band": 0.6898871527777778, "tokens_rate.above_band": 0.9726443768996961, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02735562310030395 }, { "epoch": 2.2368981678738815, "grad_norm": 147.29677746440314, "learning_rate": 3.5372238582164736e-07, "loss": 0.6009, "step": 10500, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8771929824561403, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9380530973451328, "success_rate.epoch.env.logic": 0.9195046439628483, "success_rate.epoch.env.math": 0.9773828756058158, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8391845979614949, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8789499390988695, "success_rate.epoch.global": 0.9051918735891648, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9930827460193161, "tokens_p.mean_in_band": 0.5300830696202532, "tokens_rate.above_band": 0.9065309985802177, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0934690014197823 }, { "epoch": 2.237963357477631, "grad_norm": 145.82931575421242, "learning_rate": 3.5368866352309426e-07, "loss": 0.2903, "step": 10505, "success_rate.epoch.env.abd": 0.98989898989899, "success_rate.epoch.env.agentgym:alfworld": 0.8793103448275862, "success_rate.epoch.env.agentgym:sciworld": 0.984375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9385964912280702, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9774557165861514, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8400900900900901, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8790227674686086, "success_rate.epoch.global": 0.9051685393258427, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99902950310559, "tokens_p.mean_in_band": 0.7232142857142857, "tokens_rate.above_band": 0.9857142857142858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014285714285714285 }, { "epoch": 2.2390285470813804, "grad_norm": 45.73652035213183, "learning_rate": 3.5365494582419777e-07, "loss": 0.1962, "step": 10510, "success_rate.epoch.env.abd": 0.9900990099009901, "success_rate.epoch.env.agentgym:alfworld": 0.8793103448275862, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.916923076923077, "success_rate.epoch.env.math": 0.9775280898876404, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8404494382022472, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8791739019996979, "success_rate.epoch.global": 0.905592841163311, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992647058823529, "tokens_p.mean_in_band": 0.7395833333333334, "tokens_rate.above_band": 0.9912536443148688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008746355685131196 }, { "epoch": 2.24009373668513, "grad_norm": 152.56967823307238, "learning_rate": 3.536212327495312e-07, "loss": 0.2413, "step": 10515, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8793103448275862, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9174311926605505, "success_rate.epoch.env.math": 0.9776, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8409854423292273, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8784270358804194, "success_rate.epoch.global": 0.9055679287305123, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934640522875817, "tokens_p.mean_in_band": 0.6307444852941176, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 2.2411589262888794, "grad_norm": 57.488395828120865, "learning_rate": 3.5358752432366436e-07, "loss": 0.401, "step": 10520, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8813559322033898, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9179331306990881, "success_rate.epoch.env.math": 0.9777424483306836, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.841340782122905, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8741584280203014, "success_rate.epoch.global": 0.9055432372505543, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967938813349815, "tokens_p.mean_in_band": 0.6931423611111112, "tokens_rate.above_band": 0.9677033492822966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03229665071770335 }, { "epoch": 2.242224115892629, "grad_norm": 101.06287248325886, "learning_rate": 3.5355382057116366e-07, "loss": 0.3001, "step": 10525, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8813559322033898, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9179331306990881, "success_rate.epoch.env.math": 0.9779527559055118, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8398220244716351, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.874039478013352, "success_rate.epoch.global": 0.9050772626931567, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9938271604938271, "tokens_p.mean_in_band": 0.5947916666666667, "tokens_rate.above_band": 0.84375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15625 }, { "epoch": 2.2432893054963783, "grad_norm": 113.62139919590547, "learning_rate": 3.535201215165923e-07, "loss": 0.2201, "step": 10530, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8833333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9391304347826087, "success_rate.epoch.env.logic": 0.9181818181818182, "success_rate.epoch.env.math": 0.978021978021978, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8396017699115044, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8734705437256863, "success_rate.epoch.global": 0.9046153846153846, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9962053571428572, "tokens_p.mean_in_band": 0.595108695652174, "tokens_rate.above_band": 0.9480812641083521, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05191873589164785 }, { "epoch": 2.244354495100128, "grad_norm": 36.16810147761408, "learning_rate": 3.5348642718450975e-07, "loss": 0.3298, "step": 10535, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8852459016393442, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9396551724137931, "success_rate.epoch.env.logic": 0.918429003021148, "success_rate.epoch.env.math": 0.9781931464174455, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8399558498896247, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8737623381938783, "success_rate.epoch.global": 0.9050328227571116, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967320261437909, "tokens_p.mean_in_band": 0.837890625, "tokens_rate.above_band": 0.9956616052060737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004338394793926247 }, { "epoch": 2.245419684703877, "grad_norm": 400.61767649684, "learning_rate": 3.534527375994723e-07, "loss": 0.2668, "step": 10540, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9401709401709402, "success_rate.epoch.env.logic": 0.9159159159159159, "success_rate.epoch.env.math": 0.9782945736434109, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8393839383938394, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8737062535517238, "success_rate.epoch.global": 0.9045751633986928, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984098451327433, "tokens_p.mean_in_band": 0.5321180555555556, "tokens_rate.above_band": 0.9436325678496869, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05636743215031315 }, { "epoch": 2.246484874307627, "grad_norm": 344.50962458125224, "learning_rate": 3.5341905278603255e-07, "loss": 0.3096, "step": 10545, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9401709401709402, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9768518518518519, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8399122807017544, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8738542977301281, "success_rate.epoch.global": 0.9045553145336226, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973072562358276, "tokens_p.mean_in_band": 0.584375, "tokens_rate.above_band": 0.9887892376681614, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011210762331838564 }, { "epoch": 2.247550063911376, "grad_norm": 79.51576849188532, "learning_rate": 3.533853727687399e-07, "loss": 0.2618, "step": 10550, "success_rate.epoch.env.abd": 0.9902912621359223, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.940677966101695, "success_rate.epoch.env.logic": 0.9142011834319527, "success_rate.epoch.env.math": 0.9769585253456221, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8404371584699454, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8730652207102673, "success_rate.epoch.global": 0.9041036717062635, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9995993589743589, "tokens_p.mean_in_band": 0.5754743303571429, "tokens_rate.above_band": 0.9653465346534653, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034653465346534656 }, { "epoch": 2.248615253515126, "grad_norm": 269.0632414404321, "learning_rate": 3.5335169757214004e-07, "loss": 0.2153, "step": 10555, "success_rate.epoch.env.abd": 0.9903846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.940677966101695, "success_rate.epoch.env.logic": 0.9147058823529411, "success_rate.epoch.env.math": 0.9770992366412213, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.840958605664488, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8731797853157058, "success_rate.epoch.global": 0.9045161290322581, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949596774193549, "tokens_p.mean_in_band": 0.8095703125, "tokens_rate.above_band": 0.96875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03125 }, { "epoch": 2.249680443118875, "grad_norm": 247.01504608607294, "learning_rate": 3.533180272207752e-07, "loss": 0.2698, "step": 10560, "success_rate.epoch.env.abd": 0.9903846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9127906976744186, "success_rate.epoch.env.math": 0.9772382397572079, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8400435255712732, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8722165018144559, "success_rate.epoch.global": 0.9036402569593148, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.983567950889077, "tokens_p.mean_below_band": 3.2782554626464844e-07, "tokens_p.mean_in_band": 0.4833713503649635, "tokens_rate.above_band": 0.6322269807280514, "tokens_rate.below_band": 0.0010706638115631692, "tokens_rate.in_band": 0.36670235546038543 }, { "epoch": 2.2507456327226247, "grad_norm": 228.30694282313985, "learning_rate": 3.5328436173918415e-07, "loss": 0.302, "step": 10565, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9132947976878613, "success_rate.epoch.env.math": 0.9773755656108597, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8405639913232104, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8723304533336019, "success_rate.epoch.global": 0.9040511727078892, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979910714285715, "tokens_p.mean_in_band": 0.7712053571428571, "tokens_rate.above_band": 0.9523809523809523, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047619047619047616 }, { "epoch": 2.251810822326374, "grad_norm": 195.40166225314178, "learning_rate": 3.53250701151902e-07, "loss": 0.2517, "step": 10570, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9137931034482759, "success_rate.epoch.env.math": 0.9774096385542169, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8405172413793104, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8723746014026812, "success_rate.epoch.global": 0.9039932030586236, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9929288321167883, "tokens_p.mean_in_band": 0.71630859375, "tokens_rate.above_band": 0.9448275862068966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05517241379310345 }, { "epoch": 2.2528760119301237, "grad_norm": 279.78370992556984, "learning_rate": 3.5321704548346026e-07, "loss": 0.34, "step": 10575, "success_rate.epoch.env.abd": 0.9904761904761905, "success_rate.epoch.env.agentgym:alfworld": 0.890625, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9327731092436975, "success_rate.epoch.env.logic": 0.9140401146131805, "success_rate.epoch.env.math": 0.9775449101796407, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8401287553648069, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8725318657560392, "success_rate.epoch.global": 0.9039763113367174, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952590811965812, "tokens_p.mean_in_band": 0.4995888157894737, "tokens_rate.above_band": 0.9609856262833676, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039014373716632446 }, { "epoch": 2.253941201533873, "grad_norm": 175.15692383435746, "learning_rate": 3.531833947583871e-07, "loss": 0.1896, "step": 10580, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.890625, "success_rate.epoch.env.agentgym:sciworld": 0.9846153846153847, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9150141643059491, "success_rate.epoch.env.math": 0.9776119402985075, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8404710920770878, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8727167282899674, "success_rate.epoch.global": 0.9043807919123842, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9998421717171717, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.2550063911376226, "grad_norm": 50.35143934821158, "learning_rate": 3.5314974900120686e-07, "loss": 0.1891, "step": 10585, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.8923076923076924, "success_rate.epoch.env.agentgym:sciworld": 0.9848484848484849, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9152542372881356, "success_rate.epoch.env.math": 0.9776785714285714, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8413205537806177, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8729959972314739, "success_rate.epoch.global": 0.9047818791946308, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974941037735849, "tokens_p.mean_in_band": 0.796875, "tokens_rate.above_band": 0.9883449883449883, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011655011655011656 }, { "epoch": 2.256071580741372, "grad_norm": 152.1123489151362, "learning_rate": 3.531161082364403e-07, "loss": 0.2123, "step": 10590, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.8939393939393939, "success_rate.epoch.env.agentgym:sciworld": 0.9852941176470589, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9129213483146067, "success_rate.epoch.env.math": 0.9777777777777777, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8416578108395324, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8730124434919165, "success_rate.epoch.global": 0.9047619047619048, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981496710526315, "tokens_p.mean_in_band": 0.49107142857142855, "tokens_rate.above_band": 0.9848812095032398, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01511879049676026 }, { "epoch": 2.2571367703451215, "grad_norm": 115.21188902946518, "learning_rate": 3.5308247248860455e-07, "loss": 0.2604, "step": 10595, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.8955223880597015, "success_rate.epoch.env.agentgym:sciworld": 0.9852941176470589, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9131652661064426, "success_rate.epoch.env.math": 0.9779735682819384, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8419936373276776, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8732268552105027, "success_rate.epoch.global": 0.9051580698835274, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969536163522013, "tokens_p.mean_in_band": 0.712109375, "tokens_rate.above_band": 0.9845201238390093, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015479876160990712 }, { "epoch": 2.2582019599488707, "grad_norm": 354.0929266790693, "learning_rate": 3.530488417822132e-07, "loss": 0.3058, "step": 10600, "success_rate.epoch.env.abd": 0.9905660377358491, "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.9852941176470589, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9136490250696379, "success_rate.epoch.env.math": 0.9781021897810219, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8424947145877378, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8734677545915982, "success_rate.epoch.global": 0.9055509527754764, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985972568578554, "tokens_p.mean_in_band": 0.703125, "tokens_rate.above_band": 0.9950372208436724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004962779156327543 }, { "epoch": 2.2592671495526204, "grad_norm": 155.10747268681467, "learning_rate": 3.53015216141776e-07, "loss": 0.3618, "step": 10605, "success_rate.epoch.env.abd": 0.9907407407407407, "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.9710144927536232, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9141274238227147, "success_rate.epoch.env.math": 0.9781976744186046, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8417721518987342, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8721719726655174, "success_rate.epoch.global": 0.9051155115511551, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997032122905028, "tokens_p.mean_in_band": 0.7093098958333334, "tokens_rate.above_band": 0.9675675675675676, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032432432432432434 }, { "epoch": 2.2603323391563697, "grad_norm": 2187.0544159385286, "learning_rate": 3.5298159559179904e-07, "loss": 0.2654, "step": 10610, "success_rate.epoch.env.abd": 0.9908256880733946, "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.9141274238227147, "success_rate.epoch.env.math": 0.978386167146974, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8421052631578947, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8722647572105295, "success_rate.epoch.global": 0.905505341002465, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960488505747126, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.2613975287601193, "grad_norm": 59.58398357067685, "learning_rate": 3.529479801567848e-07, "loss": 0.3142, "step": 10615, "success_rate.epoch.env.abd": 0.990990990990991, "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9333333333333333, "success_rate.epoch.env.logic": 0.914364640883978, "success_rate.epoch.env.math": 0.9784791965566715, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8405036726128017, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8721642080144807, "success_rate.epoch.global": 0.9050736497545008, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978448275862069, "tokens_p.mean_in_band": 0.44375, "tokens_rate.above_band": 0.9530516431924883, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046948356807511735 }, { "epoch": 2.2624627183638686, "grad_norm": 24.329539692316768, "learning_rate": 3.52914369861232e-07, "loss": 0.2094, "step": 10620, "success_rate.epoch.env.abd": 0.9910714285714286, "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9344262295081968, "success_rate.epoch.env.logic": 0.9146005509641874, "success_rate.epoch.env.math": 0.9785100286532952, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8411703239289446, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8723923118085897, "success_rate.epoch.global": 0.9054604726976365, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982416879795396, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.2635279079676183, "grad_norm": 55.45856846103631, "learning_rate": 3.5288076472963544e-07, "loss": 0.193, "step": 10625, "success_rate.epoch.env.abd": 0.9910714285714286, "success_rate.epoch.env.agentgym:alfworld": 0.8970588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.936, "success_rate.epoch.env.logic": 0.9148351648351648, "success_rate.epoch.env.math": 0.9785714285714285, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.841831425598335, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8726223923495265, "success_rate.epoch.global": 0.9058441558441559, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972713097713097, "tokens_p.mean_in_band": 0.7317708333333334, "tokens_rate.above_band": 0.993801652892562, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006198347107438017 }, { "epoch": 2.2645930975713675, "grad_norm": 230.45932447351632, "learning_rate": 3.528471647864864e-07, "loss": 0.2204, "step": 10630, "success_rate.epoch.env.abd": 0.9912280701754386, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9365079365079365, "success_rate.epoch.env.logic": 0.915068493150685, "success_rate.epoch.env.math": 0.9786628733997155, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8411214953271028, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8727834216306892, "success_rate.epoch.global": 0.9058205335489087, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972524650780608, "tokens_p.mean_in_band": 0.7430555555555556, "tokens_rate.above_band": 0.9926590538336052, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00734094616639478 }, { "epoch": 2.265658287175117, "grad_norm": 62.55307292288869, "learning_rate": 3.528135700562723e-07, "loss": 0.2409, "step": 10635, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.971830985915493, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9365079365079365, "success_rate.epoch.env.logic": 0.9155313351498637, "success_rate.epoch.env.math": 0.9787835926449788, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8416149068322981, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8728882625761698, "success_rate.epoch.global": 0.9061996779388084, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9912014563106796, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9903846153846154, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009615384615384616 }, { "epoch": 2.266723476778867, "grad_norm": 124.68884782592995, "learning_rate": 3.5277998056347664e-07, "loss": 0.217, "step": 10640, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9159891598915989, "success_rate.epoch.env.math": 0.9788434414668548, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8412371134020619, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.872981994753253, "success_rate.epoch.global": 0.9061748195669607, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943794964028777, "tokens_p.mean_in_band": 0.62841796875, "tokens_rate.above_band": 0.972027972027972, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027972027972027972 }, { "epoch": 2.267788666382616, "grad_norm": 198.79627418355625, "learning_rate": 3.527463963325793e-07, "loss": 0.2911, "step": 10645, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9139784946236559, "success_rate.epoch.env.math": 0.9789029535864979, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8406988694758479, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8728239359965472, "success_rate.epoch.global": 0.9057507987220448, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994637573964497, "tokens_p.mean_in_band": 0.552734375, "tokens_rate.above_band": 0.9548022598870056, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04519774011299435 }, { "epoch": 2.2688538559863654, "grad_norm": 208.3595787489902, "learning_rate": 3.527128173880563e-07, "loss": 0.1807, "step": 10650, "success_rate.epoch.env.abd": 0.991304347826087, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.972972972972973, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9146666666666666, "success_rate.epoch.env.math": 0.9790209790209791, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8411885245901639, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8729417407776207, "success_rate.epoch.global": 0.9061256961018298, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957561728395061, "tokens_p.mean_in_band": 0.765625, "tokens_rate.above_band": 0.9418604651162791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05813953488372093 }, { "epoch": 2.269919045590115, "grad_norm": 180.83061547128702, "learning_rate": 3.526792437543794e-07, "loss": 0.2654, "step": 10655, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.9733333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9148936170212766, "success_rate.epoch.env.math": 0.979050279329609, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8411405295315683, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8730002479125066, "success_rate.epoch.global": 0.9061014263074485, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9666666666666668, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970868644067796, "tokens_p.mean_in_band": 0.7020596590909091, "tokens_rate.above_band": 0.9554655870445344, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044534412955465584 }, { "epoch": 2.2709842351938647, "grad_norm": 73.78093962309599, "learning_rate": 3.526456754560172e-07, "loss": 0.2106, "step": 10660, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.9736842105263158, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937007874015748, "success_rate.epoch.env.logic": 0.9126984126984127, "success_rate.epoch.env.math": 0.979050279329609, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8412537917087968, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8728428783713564, "success_rate.epoch.global": 0.9056827150749802, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957107843137255, "tokens_p.mean_in_band": 0.5017361111111112, "tokens_rate.above_band": 0.9659090909090909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03409090909090909 }, { "epoch": 2.272049424797614, "grad_norm": 297.9625919760371, "learning_rate": 3.526121125174338e-07, "loss": 0.2347, "step": 10665, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9375, "success_rate.epoch.env.logic": 0.9126984126984127, "success_rate.epoch.env.math": 0.9792243767313019, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8415741675075681, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717830010711165, "success_rate.epoch.global": 0.9056603773584906, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992861675126904, "tokens_p.mean_in_band": 0.7742745535714286, "tokens_rate.above_band": 0.9911949685534591, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00880503144654088 }, { "epoch": 2.273114614401363, "grad_norm": 176.22258092123795, "learning_rate": 3.5257855496308974e-07, "loss": 0.1842, "step": 10670, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.8985507246376812, "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9129287598944591, "success_rate.epoch.env.math": 0.9793388429752066, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8420523138832998, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719018607019996, "success_rate.epoch.global": 0.9060297572435395, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980221518987342, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9916317991631799, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008368200836820083 }, { "epoch": 2.274179804005113, "grad_norm": 85.51227995699642, "learning_rate": 3.525450028174415e-07, "loss": 0.2446, "step": 10675, "success_rate.epoch.env.abd": 0.9913793103448276, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9131578947368421, "success_rate.epoch.env.math": 0.9794238683127572, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8418418418418419, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707443379033705, "success_rate.epoch.global": 0.905616224648986, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9966643258426966, "tokens_p.mean_in_band": 0.6833767361111112, "tokens_rate.above_band": 0.9081632653061225, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09183673469387756 }, { "epoch": 2.2752449936088626, "grad_norm": 22.846849457120204, "learning_rate": 3.525114561049416e-07, "loss": 0.306, "step": 10680, "success_rate.epoch.env.abd": 0.9914529914529915, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9131578947368421, "success_rate.epoch.env.math": 0.9794801641586868, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8427860696517413, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708874020182336, "success_rate.epoch.global": 0.905982905982906, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961180124223602, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9877300613496932, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012269938650306749 }, { "epoch": 2.276310183212612, "grad_norm": 25.147806274700965, "learning_rate": 3.5247791485003874e-07, "loss": 0.2724, "step": 10685, "success_rate.epoch.env.abd": 0.9915254237288136, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9131578947368421, "success_rate.epoch.env.math": 0.9782608695652174, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8424182358771061, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707497023734806, "success_rate.epoch.global": 0.9055727554179567, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9886098130841121, "tokens_p.mean_in_band": 0.6296142578125, "tokens_rate.above_band": 0.9145299145299145, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08547008547008547 }, { "epoch": 2.2773753728163615, "grad_norm": 116.74880959657163, "learning_rate": 3.524443790771774e-07, "loss": 0.2487, "step": 10690, "success_rate.epoch.env.abd": 0.9916666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9140625, "success_rate.epoch.env.math": 0.9782903663500678, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.841897233201581, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708000971289658, "success_rate.epoch.global": 0.9055512721665382, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969031531531531, "tokens_p.mean_in_band": 0.6125, "tokens_rate.above_band": 0.9568965517241379, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04310344827586207 }, { "epoch": 2.2784405624201107, "grad_norm": 557.7708945851317, "learning_rate": 3.524108488107984e-07, "loss": 0.1765, "step": 10695, "success_rate.epoch.env.abd": 0.9916666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9145077720207254, "success_rate.epoch.env.math": 0.977027027027027, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8415354330708661, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707370959911661, "success_rate.epoch.global": 0.9051459293394777, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8541666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949564873417721, "tokens_p.mean_in_band": 0.50421142578125, "tokens_rate.above_band": 0.9294117647058824, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07058823529411765 }, { "epoch": 2.2795057520238604, "grad_norm": 37.39371352089454, "learning_rate": 3.523773240753382e-07, "loss": 0.2124, "step": 10700, "success_rate.epoch.env.abd": 0.9834710743801653, "success_rate.epoch.env.agentgym:alfworld": 0.8857142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.937984496124031, "success_rate.epoch.env.logic": 0.9123711340206185, "success_rate.epoch.env.math": 0.9771197846567967, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8411764705882353, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869773601887578, "success_rate.epoch.global": 0.9043611323641928, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9855769230769231, "tokens_p.mean_in_band": 0.7723137842465754, "tokens_rate.above_band": 0.7517006802721088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.24829931972789115 }, { "epoch": 2.2805709416276096, "grad_norm": 290.3568710654841, "learning_rate": 3.5234380489522936e-07, "loss": 0.2414, "step": 10705, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.9125964010282777, "success_rate.epoch.env.math": 0.9771197846567967, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.841796875, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702068818830092, "success_rate.epoch.global": 0.9047256097560976, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984342379958246, "tokens_p.mean_in_band": 0.7799479166666666, "tokens_rate.above_band": 0.9937759336099585, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006224066390041493 }, { "epoch": 2.2816361312313593, "grad_norm": 209.5976528695828, "learning_rate": 3.5231029129490056e-07, "loss": 0.1927, "step": 10710, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.9102564102564102, "success_rate.epoch.env.math": 0.9772117962466488, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8425655976676385, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700724040180657, "success_rate.epoch.global": 0.904707668944571, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961189516129032, "tokens_p.mean_in_band": 0.4803059895833333, "tokens_rate.above_band": 0.9627329192546584, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037267080745341616 }, { "epoch": 2.2827013208351086, "grad_norm": 90.31931724893175, "learning_rate": 3.522767832987762e-07, "loss": 0.2834, "step": 10715, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9384615384615385, "success_rate.epoch.env.logic": 0.907928388746803, "success_rate.epoch.env.math": 0.9772423025435074, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8416988416988417, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697847430015616, "success_rate.epoch.global": 0.9039334341906202, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9965277777777778, "tokens_p.mean_in_band": 0.591796875, "tokens_rate.above_band": 0.9402985074626866, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05970149253731343 }, { "epoch": 2.2837665104388583, "grad_norm": 81.28878968548122, "learning_rate": 3.5224328093127664e-07, "loss": 0.1832, "step": 10720, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9393939393939394, "success_rate.epoch.env.logic": 0.9056122448979592, "success_rate.epoch.env.math": 0.9773936170212766, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8420038535645472, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697004324040191, "success_rate.epoch.global": 0.9039186134137152, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985672242874845, "tokens_p.mean_in_band": 0.525, "tokens_rate.above_band": 0.9584323040380047, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04156769596199525 }, { "epoch": 2.2848317000426075, "grad_norm": 78.74146652947383, "learning_rate": 3.5220978421681827e-07, "loss": 0.1419, "step": 10725, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.905852417302799, "success_rate.epoch.env.math": 0.9762219286657859, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8423076923076923, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8685915861315654, "success_rate.epoch.global": 0.9035285285285285, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9993388575458392, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.6609375, "tokens_rate.above_band": 0.9916083916083916, "tokens_rate.below_band": 0.0013986013986013986, "tokens_rate.in_band": 0.006993006993006993 }, { "epoch": 2.285896889646357, "grad_norm": 12.344528293430853, "learning_rate": 3.521762931798131e-07, "loss": 0.1707, "step": 10730, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.95, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9065656565656566, "success_rate.epoch.env.math": 0.9762845849802372, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.842911877394636, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688554180115176, "success_rate.epoch.global": 0.9038893044128646, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9938979289940828, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9825581395348837, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01744186046511628 }, { "epoch": 2.2869620792501064, "grad_norm": 148.8198533793192, "learning_rate": 3.521428078446693e-07, "loss": 0.1666, "step": 10735, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.907035175879397, "success_rate.epoch.env.math": 0.9763469119579501, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8435114503816794, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691866454663366, "success_rate.epoch.global": 0.90424739195231, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0010911312849162, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.9972144846796658, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002785515320334262 }, { "epoch": 2.288027268853856, "grad_norm": 188.99532954003726, "learning_rate": 3.521093282357906e-07, "loss": 0.2501, "step": 10740, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.905, "success_rate.epoch.env.math": 0.9764089121887287, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8444022770398482, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8690882501035685, "success_rate.epoch.global": 0.9042316258351893, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948961318051576, "tokens_p.mean_in_band": 0.46890625, "tokens_rate.above_band": 0.9654218533886584, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034578146611341634 }, { "epoch": 2.2890924584576053, "grad_norm": 189.05306246799432, "learning_rate": 3.520758543775769e-07, "loss": 0.1519, "step": 10745, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9057071960297767, "success_rate.epoch.env.math": 0.9765319426336375, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8438978240302744, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691178658731242, "success_rate.epoch.global": 0.904215976331361, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934593023255814, "tokens_p.mean_in_band": 0.4396701388888889, "tokens_rate.above_band": 0.8269230769230769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17307692307692307 }, { "epoch": 2.290157648061355, "grad_norm": 199.9336574714201, "learning_rate": 3.520423862944235e-07, "loss": 0.2702, "step": 10750, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.905940594059406, "success_rate.epoch.env.math": 0.9766233766233766, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8447789275634995, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692274963779053, "success_rate.epoch.global": 0.9045689019896831, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9937015503875969, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.2912228376651043, "grad_norm": 252.97441239856076, "learning_rate": 3.520089240107218e-07, "loss": 0.1689, "step": 10755, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.905940594059406, "success_rate.epoch.env.math": 0.9767141009055628, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8448598130841122, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869243097269069, "success_rate.epoch.global": 0.9045521292217328, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923349056603774, "tokens_p.mean_below_band": 4.6798959374427795e-08, "tokens_p.mean_in_band": 0.798828125, "tokens_rate.above_band": 0.9724770642201835, "tokens_rate.below_band": 0.009174311926605505, "tokens_rate.in_band": 0.01834862385321101 }, { "epoch": 2.292288027268854, "grad_norm": 37.7307893222106, "learning_rate": 3.5197546755085885e-07, "loss": 0.2123, "step": 10760, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9064039408866995, "success_rate.epoch.env.math": 0.9767741935483871, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.845724907063197, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693693275826327, "success_rate.epoch.global": 0.9049012435991222, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9942602040816326, "tokens_p.mean_in_band": 0.7958984375, "tokens_rate.above_band": 0.9607843137254902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0392156862745098 }, { "epoch": 2.293353216872603, "grad_norm": 95.4721040616205, "learning_rate": 3.519420169392175e-07, "loss": 0.2353, "step": 10765, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9066339066339066, "success_rate.epoch.env.math": 0.9768637532133676, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8464384828862165, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695193625995258, "success_rate.epoch.global": 0.9052478134110787, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964398734177216, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9813664596273292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018633540372670808 }, { "epoch": 2.294418406476353, "grad_norm": 380.40758038516884, "learning_rate": 3.5190857220017627e-07, "loss": 0.1563, "step": 10770, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9070904645476773, "success_rate.epoch.env.math": 0.9769230769230769, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8471454880294659, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696422643290526, "success_rate.epoch.global": 0.90559186637618, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9940878378378378, "tokens_p.mean_in_band": 0.6953125, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02631578947368421 }, { "epoch": 2.295483596080102, "grad_norm": 111.0059881001226, "learning_rate": 3.518751333581095e-07, "loss": 0.1495, "step": 10775, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9506172839506173, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9398496240601504, "success_rate.epoch.env.logic": 0.9073170731707317, "success_rate.epoch.env.math": 0.9770700636942675, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8477064220183487, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697272215457006, "success_rate.epoch.global": 0.9059334298118669, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946428571428572, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.296548785683852, "grad_norm": 112.71082779838599, "learning_rate": 3.5184170043738715e-07, "loss": 0.2298, "step": 10780, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8904109589041096, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9402985074626866, "success_rate.epoch.env.logic": 0.9075425790754258, "success_rate.epoch.env.math": 0.97712833545108, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8474885844748858, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8698287717061628, "success_rate.epoch.global": 0.9059120403749099, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0004050925925927, "tokens_p.mean_in_band": 0.6734375, "tokens_rate.above_band": 0.9953917050691244, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004608294930875576 }, { "epoch": 2.297613975287601, "grad_norm": 211.86203830332747, "learning_rate": 3.518082734623749e-07, "loss": 0.244, "step": 10785, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8783783783783784, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.9075425790754258, "success_rate.epoch.env.math": 0.9771573604060914, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8472727272727273, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688367643589349, "success_rate.epoch.global": 0.9055316091954023, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943306510607169, "tokens_p.mean_in_band": 0.7763247282608695, "tokens_rate.above_band": 0.983453237410072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016546762589928057 }, { "epoch": 2.2986791648913507, "grad_norm": 98.98032406920781, "learning_rate": 3.5177485245743406e-07, "loss": 0.2516, "step": 10790, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8783783783783784, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.9075425790754258, "success_rate.epoch.env.math": 0.9760705289672544, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8469202898550725, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8687059217356176, "success_rate.epoch.global": 0.9051539012168933, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7916666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9908854166666666, "tokens_p.mean_in_band": 0.6825657894736842, "tokens_rate.above_band": 0.8347826086956521, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16521739130434782 }, { "epoch": 2.2997443544951, "grad_norm": 43.41128327043323, "learning_rate": 3.5174143744692166e-07, "loss": 0.3017, "step": 10795, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8783783783783784, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9523809523809523, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.9079903147699758, "success_rate.epoch.env.math": 0.9761306532663316, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8477477477477477, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8688273142707359, "success_rate.epoch.global": 0.9054921540656206, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.7291666666666666, "tokens_rate.above_band": 0.963855421686747, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03614457831325301 }, { "epoch": 2.3008095440988496, "grad_norm": 325.84587311815767, "learning_rate": 3.517080284551903e-07, "loss": 0.2381, "step": 10800, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9416058394160584, "success_rate.epoch.env.logic": 0.9082125603864735, "success_rate.epoch.env.math": 0.9761904761904762, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8472596585804133, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691643222058645, "success_rate.epoch.global": 0.9054726368159204, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9523809523809523, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991143724696356, "tokens_p.mean_in_band": 0.6583806818181818, "tokens_rate.above_band": 0.9782178217821782, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02178217821782178 }, { "epoch": 2.301874733702599, "grad_norm": 57.590187926664065, "learning_rate": 3.5167462550658814e-07, "loss": 0.181, "step": 10805, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9420289855072463, "success_rate.epoch.env.logic": 0.908433734939759, "success_rate.epoch.env.math": 0.97625, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8479427549194991, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693017699485952, "success_rate.epoch.global": 0.9058073654390935, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949389460154242, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9873096446700508, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012690355329949238 }, { "epoch": 2.3029399233063486, "grad_norm": 94.29949364953272, "learning_rate": 3.516412286254592e-07, "loss": 0.2183, "step": 10810, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9420289855072463, "success_rate.epoch.env.logic": 0.9088729016786571, "success_rate.epoch.env.math": 0.9763092269326683, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.848349687778769, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695405872946228, "success_rate.epoch.global": 0.9061397318278053, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975507554296507, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9990566037735849, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0009433962264150943 }, { "epoch": 2.3040051129100982, "grad_norm": 270.6706210666898, "learning_rate": 3.516078378361427e-07, "loss": 0.1295, "step": 10815, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.9093078758949881, "success_rate.epoch.env.math": 0.976456009913259, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8486197684772929, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869655941435865, "success_rate.epoch.global": 0.9064697609001406, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985849056603774, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.3050703025138475, "grad_norm": 158.42480524875137, "learning_rate": 3.515744531629737e-07, "loss": 0.2727, "step": 10820, "success_rate.epoch.env.abd": 0.9844961240310077, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9529411764705882, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.9097387173396675, "success_rate.epoch.env.math": 0.9765432098765432, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8490230905861457, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869761897712582, "success_rate.epoch.global": 0.9067974772249474, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992236024844721, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.3061354921175967, "grad_norm": 162.36449691438347, "learning_rate": 3.515410746302827e-07, "loss": 0.2156, "step": 10825, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9424460431654677, "success_rate.epoch.env.logic": 0.909952606635071, "success_rate.epoch.env.math": 0.9766009852216748, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8496905393457118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699078585859379, "success_rate.epoch.global": 0.9071229050279329, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965062111801242, "tokens_p.mean_in_band": 0.6981026785714286, "tokens_rate.above_band": 0.9583333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041666666666666664 }, { "epoch": 2.3072006817213464, "grad_norm": 209.46721697374392, "learning_rate": 3.515077022623957e-07, "loss": 0.2887, "step": 10830, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9428571428571428, "success_rate.epoch.env.logic": 0.910377358490566, "success_rate.epoch.env.math": 0.9766584766584766, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8493392070484581, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699678086312846, "success_rate.epoch.global": 0.907098121085595, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99755859375, "tokens_p.mean_in_band": 0.4765625, "tokens_rate.above_band": 0.9696969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030303030303030304 }, { "epoch": 2.308265871325096, "grad_norm": 292.374860147934, "learning_rate": 3.5147433608363425e-07, "loss": 0.433, "step": 10835, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9105882352941177, "success_rate.epoch.env.math": 0.9767441860465116, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8488576449912126, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700599745706612, "success_rate.epoch.global": 0.9070735090152566, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968282029950083, "tokens_p.mean_in_band": 0.46268136160714285, "tokens_rate.above_band": 0.9980625518959314, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019374481040686411 }, { "epoch": 2.3093310609288453, "grad_norm": 53.31293918115055, "learning_rate": 3.5144097611831544e-07, "loss": 0.1717, "step": 10840, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9112149532710281, "success_rate.epoch.env.math": 0.976857490864799, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8491228070175438, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701513550089821, "success_rate.epoch.global": 0.907394609536973, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976635514018691, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.3103962505325946, "grad_norm": 265.3463427271913, "learning_rate": 3.5140762239075165e-07, "loss": 0.1828, "step": 10845, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9118329466357309, "success_rate.epoch.env.math": 0.9769696969696969, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8486439195100612, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701742015509928, "success_rate.epoch.global": 0.9073691460055097, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.6712239583333334, "tokens_rate.above_band": 0.9428571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05714285714285714 }, { "epoch": 2.3114614401363442, "grad_norm": 1020.7716963288548, "learning_rate": 3.5137427492525104e-07, "loss": 0.2734, "step": 10850, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.881578947368421, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9122401847575058, "success_rate.epoch.env.math": 0.9770531400966184, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8491717523975588, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704103344150489, "success_rate.epoch.global": 0.9076870281400137, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996733234714004, "tokens_p.mean_in_band": 0.7421875, "tokens_rate.above_band": 0.9960707269155207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003929273084479371 }, { "epoch": 2.312526629740094, "grad_norm": 24.902540009846284, "learning_rate": 3.5134093374611677e-07, "loss": 0.1761, "step": 10855, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.881578947368421, "success_rate.epoch.env.agentgym:sciworld": 0.9540229885057471, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9124423963133641, "success_rate.epoch.env.math": 0.9771084337349397, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8490893321769297, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704748527228919, "success_rate.epoch.global": 0.9076607387140903, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996284965034965, "tokens_p.mean_in_band": 0.64609375, "tokens_rate.above_band": 0.9662162162162162, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033783783783783786 }, { "epoch": 2.313591819343843, "grad_norm": 139.46393070170106, "learning_rate": 3.513075988776478e-07, "loss": 0.3364, "step": 10860, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9128440366972477, "success_rate.epoch.env.math": 0.9760191846522782, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8492201039861352, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707477678499008, "success_rate.epoch.global": 0.9076346284935242, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993605047748977, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9892037786774629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010796221322537112 }, { "epoch": 2.314657008947593, "grad_norm": 81.24879969062685, "learning_rate": 3.512742703441383e-07, "loss": 0.3067, "step": 10865, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9136363636363637, "success_rate.epoch.env.math": 0.9749403341288783, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8486159169550173, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8706667941575915, "success_rate.epoch.global": 0.9072690217391305, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9996019108280255, "tokens_p.mean_in_band": 0.4296875, "tokens_rate.above_band": 0.9781931464174455, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021806853582554516 }, { "epoch": 2.315722198551342, "grad_norm": 239.88705770247424, "learning_rate": 3.512409481698778e-07, "loss": 0.1198, "step": 10870, "success_rate.epoch.env.abd": 0.9847328244274809, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9140271493212669, "success_rate.epoch.env.math": 0.9750297265160524, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8481449525452976, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700734546984953, "success_rate.epoch.global": 0.9069058903182126, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983766233766234, "tokens_p.mean_in_band": 0.5870535714285714, "tokens_rate.above_band": 0.9166666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08333333333333333 }, { "epoch": 2.3167873881550918, "grad_norm": 187.17945962524766, "learning_rate": 3.512076323791515e-07, "loss": 0.2214, "step": 10875, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9440559440559441, "success_rate.epoch.env.logic": 0.9144144144144144, "success_rate.epoch.env.math": 0.9750889679715302, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8469475494411006, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700621355253367, "success_rate.epoch.global": 0.9065452091767882, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970657276995305, "tokens_p.mean_in_band": 0.4659090909090909, "tokens_rate.above_band": 0.9508928571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049107142857142856 }, { "epoch": 2.317852577758841, "grad_norm": 391.9682440873458, "learning_rate": 3.511743229962394e-07, "loss": 0.2438, "step": 10880, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8846153846153846, "success_rate.epoch.env.agentgym:sciworld": 0.9550561797752809, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9146067415730337, "success_rate.epoch.env.math": 0.9752650176678446, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8472103004291845, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701548290009292, "success_rate.epoch.global": 0.9068594485541358, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9697732997481109, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030226700251889168 }, { "epoch": 2.3189177673625907, "grad_norm": 121.18918070220326, "learning_rate": 3.5114102004541727e-07, "loss": 0.2787, "step": 10885, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9444444444444444, "success_rate.epoch.env.logic": 0.9146067415730337, "success_rate.epoch.env.math": 0.9753521126760564, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.847008547008547, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703225819208888, "success_rate.epoch.global": 0.9068364611260054, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990552325581395, "tokens_p.mean_in_band": 0.2835286458333333, "tokens_rate.above_band": 0.9862385321100917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013761467889908258 }, { "epoch": 2.31998295696634, "grad_norm": 164.42361707192023, "learning_rate": 3.5110772355095615e-07, "loss": 0.2019, "step": 10890, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8860759493670886, "success_rate.epoch.env.agentgym:sciworld": 0.9555555555555556, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9149888143176734, "success_rate.epoch.env.math": 0.9754098360655737, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.8466780238500852, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703673468970841, "success_rate.epoch.global": 0.906813627254509, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988704819277109, "tokens_p.mean_in_band": 0.5926339285714286, "tokens_rate.above_band": 0.9861386138613861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013861386138613862 }, { "epoch": 2.3210481465700896, "grad_norm": 266.8056800521331, "learning_rate": 3.5107443353712214e-07, "loss": 0.3655, "step": 10895, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9560439560439561, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9151785714285714, "success_rate.epoch.env.math": 0.9754672897196262, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8463497453310697, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700022041835269, "success_rate.epoch.global": 0.9064580559254327, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983734772978959, "tokens_p.mean_in_band": 0.5698784722222222, "tokens_rate.above_band": 0.9804560260586319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019543973941368076 }, { "epoch": 2.322113336173839, "grad_norm": 104.4113513988998, "learning_rate": 3.5104115002817686e-07, "loss": 0.3625, "step": 10900, "success_rate.epoch.env.abd": 0.9849624060150376, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9565217391304348, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9131403118040089, "success_rate.epoch.env.math": 0.9755529685681025, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8460236886632826, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869848846620723, "success_rate.epoch.global": 0.9061048440610484, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968522451456311, "tokens_p.mean_in_band": 0.49973060344827586, "tokens_rate.above_band": 0.9660023446658851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03399765533411489 }, { "epoch": 2.3231785257775885, "grad_norm": 313.4408587656172, "learning_rate": 3.5100787304837705e-07, "loss": 0.287, "step": 10905, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.956989247311828, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9133333333333333, "success_rate.epoch.env.math": 0.9756380510440835, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8465430016863407, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699740417171223, "success_rate.epoch.global": 0.906415343915344, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983198924731183, "tokens_p.mean_in_band": 0.765625, "tokens_rate.above_band": 0.9841269841269841, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015873015873015872 }, { "epoch": 2.324243715381338, "grad_norm": 142.3907490921456, "learning_rate": 3.509746026219748e-07, "loss": 0.2068, "step": 10910, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.956989247311828, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9135254988913526, "success_rate.epoch.env.math": 0.9756944444444444, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8464765100671141, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700006440497283, "success_rate.epoch.global": 0.9063941990771259, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99625, "tokens_p.mean_in_band": 0.2611607142857143, "tokens_rate.above_band": 0.9345794392523364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06542056074766354 }, { "epoch": 2.3253089049850875, "grad_norm": 424.1618384504015, "learning_rate": 3.5094133877321723e-07, "loss": 0.2341, "step": 10915, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9448275862068966, "success_rate.epoch.env.logic": 0.9137168141592921, "success_rate.epoch.env.math": 0.9756944444444444, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8475, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701526773587861, "success_rate.epoch.global": 0.9067017082785808, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951171875, "tokens_p.mean_in_band": 0.8583984375, "tokens_rate.above_band": 0.9795918367346939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02040816326530612 }, { "epoch": 2.3263740945888367, "grad_norm": 20.68506087550506, "learning_rate": 3.5090808152634684e-07, "loss": 0.1204, "step": 10920, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9452054794520548, "success_rate.epoch.env.logic": 0.9146608315098468, "success_rate.epoch.env.math": 0.9757785467128027, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8476269775187344, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8702920401208594, "success_rate.epoch.global": 0.9070072036673216, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977558348294434, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9982078853046595, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017921146953405018 }, { "epoch": 2.3274392841925864, "grad_norm": 38.27788884088602, "learning_rate": 3.508748309056014e-07, "loss": 0.24, "step": 10925, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9545454545454546, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9452054794520548, "success_rate.epoch.env.logic": 0.9148471615720524, "success_rate.epoch.env.math": 0.9746835443037974, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8485099337748344, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870289702294432, "success_rate.epoch.global": 0.9069843342036553, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9894859813084113, "tokens_p.mean_in_band": 0.376953125, "tokens_rate.above_band": 0.9907407407407407, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009259259259259259 }, { "epoch": 2.3285044737963356, "grad_norm": 62.60312562236735, "learning_rate": 3.5084158693521354e-07, "loss": 0.2694, "step": 10930, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9578947368421052, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9455782312925171, "success_rate.epoch.env.logic": 0.9128540305010894, "success_rate.epoch.env.math": 0.9747126436781609, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8491343775762572, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704221911000442, "success_rate.epoch.global": 0.9069616135328562, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980552575107297, "tokens_p.mean_in_band": 0.679296875, "tokens_rate.above_band": 0.9789915966386554, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02100840336134454 }, { "epoch": 2.3295696634000853, "grad_norm": 103.45720820916355, "learning_rate": 3.5080834963941135e-07, "loss": 0.1351, "step": 10935, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9455782312925171, "success_rate.epoch.env.logic": 0.9136069114470843, "success_rate.epoch.env.math": 0.9747706422018348, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8495065789473685, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705696163120448, "success_rate.epoch.global": 0.9072632944228275, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965722120658135, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.3306348530038346, "grad_norm": 234.33541657364728, "learning_rate": 3.507751190424178e-07, "loss": 0.1285, "step": 10940, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9459459459459459, "success_rate.epoch.env.logic": 0.9141630901287554, "success_rate.epoch.env.math": 0.9748283752860412, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.8498769483182936, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8707024279713272, "success_rate.epoch.global": 0.907563025210084, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960403726708075, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9926017262638718, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007398273736128237 }, { "epoch": 2.3317000426075842, "grad_norm": 170.70696067579433, "learning_rate": 3.5074189516845126e-07, "loss": 0.3629, "step": 10945, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9587628865979382, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9463087248322147, "success_rate.epoch.env.logic": 0.9141630901287554, "success_rate.epoch.env.math": 0.9748858447488584, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8503679476696647, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703556084510801, "success_rate.epoch.global": 0.9075386597938144, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0001905487804879, "tokens_p.mean_in_band": 0.5772569444444444, "tokens_rate.above_band": 0.9785202863961814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021479713603818614 }, { "epoch": 2.3327652322113335, "grad_norm": 228.38892361976727, "learning_rate": 3.50708678041725e-07, "loss": 0.2608, "step": 10950, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9143468950749465, "success_rate.epoch.env.math": 0.9738339021615472, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.850040749796251, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703177351048409, "success_rate.epoch.global": 0.9071933204881182, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8833333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953013126491647, "tokens_p.mean_in_band": 0.69296875, "tokens_rate.above_band": 0.9654377880184332, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03456221198156682 }, { "epoch": 2.333830421815083, "grad_norm": 198.01043081154796, "learning_rate": 3.5067546768644745e-07, "loss": 0.1753, "step": 10955, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.8875, "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9145299145299145, "success_rate.epoch.env.math": 0.9739819004524887, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8502847843775427, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704074931460929, "success_rate.epoch.global": 0.9074903969270166, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989475388601037, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.9974160206718347, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002583979328165375 }, { "epoch": 2.3348956114188324, "grad_norm": 35.31957113571333, "learning_rate": 3.50642264126822e-07, "loss": 0.1807, "step": 10960, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.9595959595959596, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9148936170212766, "success_rate.epoch.env.math": 0.9740698985343855, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8499594484995945, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705452434719292, "success_rate.epoch.global": 0.9074664964901085, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968519656019657, "tokens_p.mean_in_band": 0.6315104166666666, "tokens_rate.above_band": 0.9926829268292683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007317073170731708 }, { "epoch": 2.335960801022582, "grad_norm": 48.7135950432769, "learning_rate": 3.506090673870472e-07, "loss": 0.2937, "step": 10965, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9152542372881356, "success_rate.epoch.env.math": 0.9741282339707537, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8504446240905417, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8706737851283848, "success_rate.epoch.global": 0.9077608142493638, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992917847025495, "tokens_p.mean_in_band": 0.8190104166666666, "tokens_rate.above_band": 0.9915730337078652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008426966292134831 }, { "epoch": 2.3370259906263313, "grad_norm": 50.60756611662624, "learning_rate": 3.505758774913167e-07, "loss": 0.2169, "step": 10970, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9466666666666667, "success_rate.epoch.env.logic": 0.9135021097046413, "success_rate.epoch.env.math": 0.9741863075196409, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8509266720386784, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705824245148396, "success_rate.epoch.global": 0.9077362079898541, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992954911433173, "tokens_p.mean_in_band": 0.48408564814814814, "tokens_rate.above_band": 0.9583333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041666666666666664 }, { "epoch": 2.338091180230081, "grad_norm": 176.03274686441375, "learning_rate": 3.5054269446381877e-07, "loss": 0.3998, "step": 10975, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9138655462184874, "success_rate.epoch.env.math": 0.9731843575418995, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8512861736334405, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705891689706459, "success_rate.epoch.global": 0.9077117572692794, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978830645161291, "tokens_p.mean_in_band": 0.5849494934082031, "tokens_rate.above_band": 0.9951845906902087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004815409309791332 }, { "epoch": 2.3391563698338302, "grad_norm": 110.65938234443695, "learning_rate": 3.505095183287373e-07, "loss": 0.328, "step": 10980, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9142259414225942, "success_rate.epoch.env.math": 0.9732739420935412, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8509615384615384, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8658158749375985, "success_rate.epoch.global": 0.9073724007561437, "success_rate.window.env.babyai": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9910201149425287, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.709228515625, "tokens_rate.above_band": 0.9666666666666667, "tokens_rate.below_band": 0.003703703703703704, "tokens_rate.in_band": 0.02962962962962963 }, { "epoch": 2.34022155943758, "grad_norm": 98.01721192373007, "learning_rate": 3.504763491102506e-07, "loss": 0.2004, "step": 10985, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8888888888888888, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9470198675496688, "success_rate.epoch.env.logic": 0.9145833333333333, "success_rate.epoch.env.math": 0.9733924611973392, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8514376996805112, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8659024269588266, "success_rate.epoch.global": 0.9076633165829145, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953947368421052, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.979381443298969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020618556701030927 }, { "epoch": 2.341286749041329, "grad_norm": 68.84285969692594, "learning_rate": 3.504431868325323e-07, "loss": 0.3017, "step": 10990, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8795180722891566, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9149377593360996, "success_rate.epoch.env.math": 0.9733924611973392, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8520286396181385, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8651681635809744, "success_rate.epoch.global": 0.9076393237319975, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986053719008264, "tokens_p.mean_in_band": 0.6766183035714286, "tokens_rate.above_band": 0.9942481511914544, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005751848808545604 }, { "epoch": 2.342351938645079, "grad_norm": 341.44705935430034, "learning_rate": 3.5041003151975067e-07, "loss": 0.1903, "step": 10995, "success_rate.epoch.env.abd": 0.9858156028368794, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.9603960396039604, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9151138716356108, "success_rate.epoch.env.math": 0.9734806629834254, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8523809523809524, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8653998265117189, "success_rate.epoch.global": 0.9079275905118602, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985576923076923, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9908536585365854, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009146341463414634 }, { "epoch": 2.343417128248828, "grad_norm": 31.547450500014687, "learning_rate": 3.5037688319606924e-07, "loss": 0.2906, "step": 11000, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9154639175257732, "success_rate.epoch.env.math": 0.9735099337748344, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8527315914489311, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8656468422637439, "success_rate.epoch.global": 0.9082140634723086, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973268839103869, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9979674796747967, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0020325203252032522 }, { "epoch": 2.3444823178525778, "grad_norm": 256.4405931443873, "learning_rate": 3.503437418856461e-07, "loss": 0.168, "step": 11005, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9477124183006536, "success_rate.epoch.env.logic": 0.9156378600823045, "success_rate.epoch.env.math": 0.973568281938326, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8525236593059937, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8656891587112198, "success_rate.epoch.global": 0.9081885856079405, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964511041009464, "tokens_p.mean_in_band": 0.6907552083333334, "tokens_rate.above_band": 0.990625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009375 }, { "epoch": 2.3455475074563275, "grad_norm": 144.97709738464556, "learning_rate": 3.503106076126346e-07, "loss": 0.3379, "step": 11010, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8823529411764706, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9483870967741935, "success_rate.epoch.env.logic": 0.9158110882956879, "success_rate.epoch.env.math": 0.9725877192982456, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8519685039370078, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8656612426654067, "success_rate.epoch.global": 0.9078540507111935, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9997905027932961, "tokens_p.mean_in_band": 0.5681423611111112, "tokens_rate.above_band": 0.9900442477876106, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00995575221238938 }, { "epoch": 2.3466126970600767, "grad_norm": 146.60794017629738, "learning_rate": 3.5027748040118255e-07, "loss": 0.2192, "step": 11015, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8735632183908046, "success_rate.epoch.env.agentgym:sciworld": 0.9611650485436893, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9158110882956879, "success_rate.epoch.env.math": 0.9726177437020811, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8526645768025078, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649582632497179, "success_rate.epoch.global": 0.907829839704069, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998989898989899, "tokens_p.mean_in_band": 0.8017578125, "tokens_rate.above_band": 0.9939759036144579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006024096385542169 }, { "epoch": 2.347677886663826, "grad_norm": 120.57955067475064, "learning_rate": 3.502443602754329e-07, "loss": 0.2976, "step": 11020, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8735632183908046, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9490445859872612, "success_rate.epoch.env.logic": 0.9144602851323829, "success_rate.epoch.env.math": 0.972707423580786, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8527799530148786, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649177453580684, "success_rate.epoch.global": 0.9078057775046097, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983258928571429, "tokens_p.mean_in_band": 0.630859375, "tokens_rate.above_band": 0.9767441860465116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023255813953488372 }, { "epoch": 2.3487430762675756, "grad_norm": 264.0727461488841, "learning_rate": 3.502112472595234e-07, "loss": 0.3691, "step": 11025, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8735632183908046, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9148073022312373, "success_rate.epoch.env.math": 0.9728260869565217, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.85234375, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8649497435471634, "success_rate.epoch.global": 0.9077818627450981, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958479020979021, "tokens_p.mean_in_band": 0.6197916666666666, "tokens_rate.above_band": 0.9694915254237289, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030508474576271188 }, { "epoch": 2.3498082658713253, "grad_norm": 32.628655459982646, "learning_rate": 3.501781413775866e-07, "loss": 0.2459, "step": 11030, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9615384615384616, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9493670886075949, "success_rate.epoch.env.logic": 0.9151515151515152, "success_rate.epoch.env.math": 0.972885032537961, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8520249221183801, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640636763931879, "success_rate.epoch.global": 0.907452657299939, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980177238805971, "tokens_p.mean_in_band": 0.5654296875, "tokens_rate.above_band": 0.9852941176470589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014705882352941176 }, { "epoch": 2.3508734554750745, "grad_norm": 76.97015092120746, "learning_rate": 3.501450426537497e-07, "loss": 0.1713, "step": 11035, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9153225806451613, "success_rate.epoch.env.math": 0.9729437229437229, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8521400778210116, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642311621784935, "success_rate.epoch.global": 0.9077344701583435, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988290398126464, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9984411535463756, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001558846453624318 }, { "epoch": 2.351938645078824, "grad_norm": 247.05306713436704, "learning_rate": 3.5011195111213497e-07, "loss": 0.3353, "step": 11040, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9473684210526315, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9154929577464789, "success_rate.epoch.env.math": 0.9730312837108953, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.8520526723470179, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642466651234476, "success_rate.epoch.global": 0.9077109896782027, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956487341772152, "tokens_p.mean_in_band": 0.55625, "tokens_rate.above_band": 0.8876404494382022, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11235955056179775 }, { "epoch": 2.3530038346825735, "grad_norm": 66.73765785891717, "learning_rate": 3.5007886677685916e-07, "loss": 0.1328, "step": 11045, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.914, "success_rate.epoch.env.math": 0.9731471535982814, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.851508120649652, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8643112050680757, "success_rate.epoch.global": 0.9073849878934624, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958053691275168, "tokens_p.mean_in_band": 0.6730324074074074, "tokens_rate.above_band": 0.9430379746835443, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056962025316455694 }, { "epoch": 2.354069024286323, "grad_norm": 168.69128030927538, "learning_rate": 3.50045789672034e-07, "loss": 0.1824, "step": 11050, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9146825396825397, "success_rate.epoch.env.math": 0.973175965665236, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.851195065535852, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639145134202384, "success_rate.epoch.global": 0.9070609535304768, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971590909090909, "tokens_p.mean_in_band": 0.5193359375, "tokens_rate.above_band": 0.8918918918918919, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10810810810810811 }, { "epoch": 2.3551342138900724, "grad_norm": 152.40827715421915, "learning_rate": 3.5001271982176575e-07, "loss": 0.1787, "step": 11055, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9503105590062112, "success_rate.epoch.env.logic": 0.9151873767258383, "success_rate.epoch.env.math": 0.9732620320855615, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8508839354342813, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639399473622432, "success_rate.epoch.global": 0.9070397111913358, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971217105263158, "tokens_p.mean_in_band": 0.7053571428571429, "tokens_rate.above_band": 0.9156626506024096, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08433734939759036 }, { "epoch": 2.356199403493822, "grad_norm": 59.24375211064543, "learning_rate": 3.499796572501555e-07, "loss": 0.3147, "step": 11060, "success_rate.epoch.env.abd": 0.9863945578231292, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.9153543307086615, "success_rate.epoch.env.math": 0.9733475479744137, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8505747126436781, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639902140416645, "success_rate.epoch.global": 0.9070185962807439, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994299674267101, "tokens_p.mean_in_band": 0.61875, "tokens_rate.above_band": 0.9839743589743589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016025641025641024 }, { "epoch": 2.3572645930975713, "grad_norm": 103.70876244723827, "learning_rate": 3.499466019812991e-07, "loss": 0.2315, "step": 11065, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.8651685393258427, "success_rate.epoch.env.agentgym:sciworld": 0.9619047619047619, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.916015625, "success_rate.epoch.env.math": 0.9733759318423855, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.8508033664881408, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642295895942259, "success_rate.epoch.global": 0.9072966507177034, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996482176360225, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9962616822429906, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003738317757009346 }, { "epoch": 2.358329782701321, "grad_norm": 77.01239787629495, "learning_rate": 3.4991355403928694e-07, "loss": 0.3863, "step": 11070, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.950920245398773, "success_rate.epoch.env.logic": 0.9161793372319688, "success_rate.epoch.env.math": 0.9734607218683652, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.851258581235698, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644624289928591, "success_rate.epoch.global": 0.9075730471079309, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989536830357143, "tokens_p.mean_in_band": 0.7377232142857143, "tokens_rate.above_band": 0.9846153846153847, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015384615384615385 }, { "epoch": 2.3593949723050702, "grad_norm": 12.668494255059269, "learning_rate": 3.4988051344820406e-07, "loss": 0.2475, "step": 11075, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9161793372319688, "success_rate.epoch.env.math": 0.9725158562367865, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.8515981735159818, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640491994087377, "success_rate.epoch.global": 0.9072532699167658, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972246891651865, "tokens_p.mean_in_band": 0.6430921052631579, "tokens_rate.above_band": 0.9673539518900344, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03264604810996564 }, { "epoch": 2.36046016190882, "grad_norm": 56.933228687245524, "learning_rate": 3.4984748023213027e-07, "loss": 0.218, "step": 11080, "success_rate.epoch.env.abd": 0.9867549668874173, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9161793372319688, "success_rate.epoch.env.math": 0.9726027397260274, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8521607278241091, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637569420312975, "success_rate.epoch.global": 0.9072317723770006, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958881578947368, "tokens_p.mean_in_band": 0.5427631578947368, "tokens_rate.above_band": 0.9090909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09090909090909091 }, { "epoch": 2.361525351512569, "grad_norm": 727.1168428241937, "learning_rate": 3.4981445441513994e-07, "loss": 0.2851, "step": 11085, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.916504854368932, "success_rate.epoch.env.math": 0.9727463312368972, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8523845571536715, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638278580634008, "success_rate.epoch.global": 0.9075059101654847, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973714953271028, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.362590541116319, "grad_norm": 80.2901158722856, "learning_rate": 3.4978143602130205e-07, "loss": 0.2347, "step": 11090, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9147286821705426, "success_rate.epoch.env.math": 0.9728317659352143, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8529411764705882, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637568174546147, "success_rate.epoch.global": 0.9074837949322333, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958462132921174, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9803030303030303, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019696969696969695 }, { "epoch": 2.363655730720068, "grad_norm": 34.87870520427987, "learning_rate": 3.4974842507468016e-07, "loss": 0.2887, "step": 11095, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, "success_rate.epoch.env.agentgym:sciworld": 0.9626168224299065, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9147286821705426, "success_rate.epoch.env.math": 0.972972972972973, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8525206922498119, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638646287529952, "success_rate.epoch.global": 0.9074618096357226, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980633802816902, "tokens_p.mean_in_band": 0.5052083333333334, "tokens_rate.above_band": 0.9957924263674615, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004207573632538569 }, { "epoch": 2.3647209203238178, "grad_norm": 143.8052000119641, "learning_rate": 3.4971542159933235e-07, "loss": 0.343, "step": 11100, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.95, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9153846153846154, "success_rate.epoch.env.math": 0.9730848861283644, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8526315789473684, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639759808984567, "success_rate.epoch.global": 0.9077328646748682, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988207547169812, "tokens_p.mean_in_band": 0.873046875, "tokens_rate.above_band": 0.9814814814814815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018518518518518517 }, { "epoch": 2.365786109927567, "grad_norm": 66.84384196733302, "learning_rate": 3.496824256193114e-07, "loss": 0.1747, "step": 11105, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9153846153846154, "success_rate.epoch.env.math": 0.9732510288065843, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8528528528528528, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642354689236079, "success_rate.epoch.global": 0.9080023364485982, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984707446808511, "tokens_p.mean_in_band": 0.6671875, "tokens_rate.above_band": 0.9791666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020833333333333332 }, { "epoch": 2.3668512995313167, "grad_norm": 109.9298418509935, "learning_rate": 3.4964943715866455e-07, "loss": 0.5967, "step": 11110, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9157088122605364, "success_rate.epoch.env.math": 0.9733879222108496, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.851685393258427, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641712535314042, "success_rate.epoch.global": 0.9076878276062901, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9914568345323741, "tokens_p.mean_in_band": 0.6556919642857143, "tokens_rate.above_band": 0.952054794520548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04794520547945205 }, { "epoch": 2.367916489135066, "grad_norm": 49.83853824283283, "learning_rate": 3.496164562414335e-07, "loss": 0.2379, "step": 11115, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.916030534351145, "success_rate.epoch.env.math": 0.9734422880490297, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8522388059701492, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8643860580677631, "success_rate.epoch.global": 0.9079558652729385, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982918006430869, "tokens_p.mean_in_band": 0.8638392857142857, "tokens_rate.above_band": 0.9779874213836478, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0220125786163522 }, { "epoch": 2.3689816787388156, "grad_norm": 127.47305892717033, "learning_rate": 3.4958348289165445e-07, "loss": 0.2766, "step": 11120, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9165085388994307, "success_rate.epoch.env.math": 0.973630831643002, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8522388059701492, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644466533534229, "success_rate.epoch.global": 0.9082223508975101, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974563953488372, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.370046868342565, "grad_norm": 233.27903982768112, "learning_rate": 3.4955051713335827e-07, "loss": 0.2593, "step": 11125, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9515151515151515, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9736575481256332, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8513011152416357, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644663073093124, "success_rate.epoch.global": 0.9079099307159353, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977851605758582, "tokens_p.mean_in_band": 0.5736177884615384, "tokens_rate.above_band": 0.9858078602620087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014192139737991267 }, { "epoch": 2.3711120579463145, "grad_norm": 76.97890279663311, "learning_rate": 3.4951755899057015e-07, "loss": 0.1438, "step": 11130, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9515151515151515, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9737638748738647, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8519615099925981, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8645360092637847, "success_rate.epoch.global": 0.9081750143926309, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943181818181818, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9649122807017544, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03508771929824561 }, { "epoch": 2.3721772475500638, "grad_norm": 56.06022830971406, "learning_rate": 3.4948460848730966e-07, "loss": 0.1753, "step": 11135, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9168241965973535, "success_rate.epoch.env.math": 0.9738693467336683, "success_rate.epoch.env.sat": 0.08695652173913043, "success_rate.epoch.env.science": 0.8523985239852399, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8646261995532067, "success_rate.epoch.global": 0.9084385763490241, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9999078171091446, "tokens_p.mean_in_band": 0.76953125, "tokens_rate.above_band": 0.9970588235294118, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0029411764705882353 }, { "epoch": 2.3732424371538134, "grad_norm": 60.27607452460975, "learning_rate": 3.49451665647591e-07, "loss": 0.2476, "step": 11140, "success_rate.epoch.env.abd": 0.9869281045751634, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, "success_rate.epoch.env.agentgym:textcraft": 0.9565217391304348, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9169811320754717, "success_rate.epoch.env.math": 0.973973973973974, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8526160648489315, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636786569203967, "success_rate.epoch.global": 0.9081282198053806, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990190582959642, "tokens_p.mean_in_band": 0.6414473684210527, "tokens_rate.above_band": 0.9591397849462365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04086021505376344 }, { "epoch": 2.3743076267575627, "grad_norm": 692.8649133006721, "learning_rate": 3.4941873049542254e-07, "loss": 0.2884, "step": 11145, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9171374764595104, "success_rate.epoch.env.math": 0.9740777666999003, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8522058823529411, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638374233980962, "success_rate.epoch.global": 0.9081050228310502, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966216216216216, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9847908745247148, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015209125475285171 }, { "epoch": 2.3753728163613124, "grad_norm": 36.16419467614971, "learning_rate": 3.4938580305480727e-07, "loss": 0.0947, "step": 11150, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8631578947368421, "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9171374764595104, "success_rate.epoch.env.math": 0.974155069582505, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8527472527472527, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863183680314303, "success_rate.epoch.global": 0.9080819578827547, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975293803418803, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.9957446808510638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00425531914893617 }, { "epoch": 2.3764380059650616, "grad_norm": 67.95497167009935, "learning_rate": 3.4935288334974246e-07, "loss": 0.2231, "step": 11155, "success_rate.epoch.env.abd": 0.987012987012987, "success_rate.epoch.env.agentgym:alfworld": 0.8631578947368421, "success_rate.epoch.env.agentgym:sciworld": 0.954954954954955, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9177570093457944, "success_rate.epoch.env.math": 0.9742063492063492, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8517165814463111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631509658787746, "success_rate.epoch.global": 0.9077752553916004, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961890243902439, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.917910447761194, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08208955223880597 }, { "epoch": 2.3775031955688113, "grad_norm": 1052.336631436009, "learning_rate": 3.493199714042198e-07, "loss": 0.3742, "step": 11160, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.8645833333333334, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9583333333333334, "success_rate.epoch.env.babyai": 0.9523809523809523, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9177570093457944, "success_rate.epoch.env.math": 0.9742574257425742, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8508005822416302, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632461014252097, "success_rate.epoch.global": 0.9074702886247877, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998630275974026, "tokens_p.mean_in_band": 0.3428955078125, "tokens_rate.above_band": 0.9871794871794872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01282051282051282 }, { "epoch": 2.3785683851725605, "grad_norm": 46.269544382856104, "learning_rate": 3.492870672422252e-07, "loss": 0.2332, "step": 11165, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.8645833333333334, "success_rate.epoch.env.agentgym:sciworld": 0.9553571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9520958083832335, "success_rate.epoch.env.logic": 0.9182156133828996, "success_rate.epoch.env.math": 0.9733464955577492, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.850909090909091, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635893678632534, "success_rate.epoch.global": 0.90744920993228, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989193925233645, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.9972041006523765, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0027958993476234857 }, { "epoch": 2.37963357477631, "grad_norm": 71.63228015035224, "learning_rate": 3.49254170887739e-07, "loss": 0.3182, "step": 11170, "success_rate.epoch.env.abd": 0.9870967741935484, "success_rate.epoch.env.agentgym:alfworld": 0.8645833333333334, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9520958083832335, "success_rate.epoch.env.logic": 0.9183673469387755, "success_rate.epoch.env.math": 0.9733727810650887, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8515568428674873, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629381905193534, "success_rate.epoch.global": 0.9074282498593135, "success_rate.window.env.agentgym:sciworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964334239130435, "tokens_p.mean_in_band": 0.670654296875, "tokens_rate.above_band": 0.9787234042553191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02127659574468085 }, { "epoch": 2.3806987643800595, "grad_norm": 327.0478312384573, "learning_rate": 3.492212823647358e-07, "loss": 0.2949, "step": 11175, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.865979381443299, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9183673469387755, "success_rate.epoch.env.math": 0.9724409448818898, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8512635379061372, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862994592634481, "success_rate.epoch.global": 0.9071268237934904, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983592487883683, "tokens_p.mean_in_band": 0.6180245535714286, "tokens_rate.above_band": 0.9943775100401606, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005622489959839358 }, { "epoch": 2.381763953983809, "grad_norm": 21.533846986080754, "learning_rate": 3.491884016971846e-07, "loss": 0.2154, "step": 11180, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8686868686868687, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.96, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9191176470588235, "success_rate.epoch.env.math": 0.9724680432645034, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8507570295602018, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632653542163259, "success_rate.epoch.global": 0.9071068830442082, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992795389048992, "tokens_p.mean_in_band": 0.6514369419642857, "tokens_rate.above_band": 0.9900142653352354, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009985734664764621 }, { "epoch": 2.382829143587559, "grad_norm": 235.064921009993, "learning_rate": 3.4915552890904853e-07, "loss": 0.296, "step": 11185, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8686868686868687, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9175824175824175, "success_rate.epoch.env.math": 0.97252208047105, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8505747126436781, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863253986248333, "success_rate.epoch.global": 0.9068080357142857, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9993580337490829, "tokens_p.mean_in_band": 0.5082347972972973, "tokens_rate.above_band": 0.9735714285714285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02642857142857143 }, { "epoch": 2.383894333191308, "grad_norm": 132.30094566011968, "learning_rate": 3.491226640242851e-07, "loss": 0.48, "step": 11190, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8613861386138614, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9175824175824175, "success_rate.epoch.env.math": 0.9725759059745348, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8512160228898427, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626534776734824, "success_rate.epoch.global": 0.9067890929326655, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982638888888888, "tokens_p.mean_in_band": 0.66552734375, "tokens_rate.above_band": 0.9878048780487805, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012195121951219513 }, { "epoch": 2.3849595227950573, "grad_norm": 119.03417947625366, "learning_rate": 3.490898070668459e-07, "loss": 0.2041, "step": 11195, "success_rate.epoch.env.abd": 0.9872611464968153, "success_rate.epoch.env.agentgym:alfworld": 0.8613861386138614, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9177330895795247, "success_rate.epoch.env.math": 0.9727095516569201, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8509272467902995, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626530723625687, "success_rate.epoch.global": 0.9067702552719201, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9894662921348315, "tokens_p.mean_in_band": 0.695703125, "tokens_rate.above_band": 0.898989898989899, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10101010101010101 }, { "epoch": 2.386024712398807, "grad_norm": 304.03407850072375, "learning_rate": 3.4905695806067695e-07, "loss": 0.2916, "step": 11200, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, "success_rate.epoch.env.agentgym:sciworld": 0.9473684210526315, "success_rate.epoch.env.agentgym:textcraft": 0.9615384615384616, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9180327868852459, "success_rate.epoch.env.math": 0.9727891156462585, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8504273504273504, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8622084590291458, "success_rate.epoch.global": 0.9064748201438849, "success_rate.window.env.abd": 0.5, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957748724489796, "tokens_p.mean_below_band": 4.842877388000488e-07, "tokens_p.mean_in_band": 0.13498369235436894, "tokens_rate.above_band": 0.48635235732009924, "tokens_rate.below_band": 0.0024813895781637717, "tokens_rate.in_band": 0.511166253101737 }, { "epoch": 2.3870899020025567, "grad_norm": 237.75061366615947, "learning_rate": 3.4902411702971835e-07, "loss": 0.1495, "step": 11205, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9180327868852459, "success_rate.epoch.env.math": 0.972894482090997, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8506401137980085, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626484421645265, "success_rate.epoch.global": 0.9067328918322296, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982718894009217, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.388155091606306, "grad_norm": 148.02542978282412, "learning_rate": 3.4899128399790434e-07, "loss": 0.2462, "step": 11210, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9183303085299456, "success_rate.epoch.env.math": 0.9729468599033816, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8504606661941885, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8627804879041006, "success_rate.epoch.global": 0.906714364336819, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973299050632911, "tokens_p.mean_in_band": 0.6047585227272727, "tokens_rate.above_band": 0.9663608562691132, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03363914373088685 }, { "epoch": 2.389220281210055, "grad_norm": 464.4619204739061, "learning_rate": 3.489584589891635e-07, "loss": 0.143, "step": 11215, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9478260869565217, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9526627218934911, "success_rate.epoch.env.logic": 0.9187725631768953, "success_rate.epoch.env.math": 0.972972972972973, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8509887005649718, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628966853222699, "success_rate.epoch.global": 0.9069703622392975, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923502604166666, "tokens_p.mean_in_band": 0.86015625, "tokens_rate.above_band": 0.9746192893401016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025380710659898477 }, { "epoch": 2.390285470813805, "grad_norm": 637.8273901020881, "learning_rate": 3.489256420274183e-07, "loss": 0.3191, "step": 11220, "success_rate.epoch.env.abd": 0.9811320754716981, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.9171171171171171, "success_rate.epoch.env.math": 0.973051010587103, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8514084507042253, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628576463571015, "success_rate.epoch.global": 0.9069512862616311, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966692789968652, "tokens_p.mean_in_band": 0.6085069444444444, "tokens_rate.above_band": 0.9725609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027439024390243903 }, { "epoch": 2.3913506604175545, "grad_norm": 64.07865439749847, "learning_rate": 3.488928331365857e-07, "loss": 0.2279, "step": 11225, "success_rate.epoch.env.abd": 0.98125, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9482758620689655, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9174147217235189, "success_rate.epoch.env.math": 0.9731285988483686, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8517217146872804, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629559717368689, "success_rate.epoch.global": 0.9072052401746725, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978553921568627, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9902912621359223, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009708737864077669 }, { "epoch": 2.3924158500213037, "grad_norm": 116.04393733212471, "learning_rate": 3.488600323405764e-07, "loss": 0.3599, "step": 11230, "success_rate.epoch.env.abd": 0.98125, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9532163742690059, "success_rate.epoch.env.logic": 0.9160714285714285, "success_rate.epoch.env.math": 0.9731543624161074, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8515406162464986, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628599225208553, "success_rate.epoch.global": 0.9069134458356015, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954459798994975, "tokens_p.mean_in_band": 0.625, "tokens_rate.above_band": 0.9476190476190476, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05238095238095238 }, { "epoch": 2.3934810396250534, "grad_norm": 181.36751715093163, "learning_rate": 3.4882723966329554e-07, "loss": 0.2478, "step": 11235, "success_rate.epoch.env.abd": 0.98125, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9160714285714285, "success_rate.epoch.env.math": 0.9732313575525813, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8514644351464435, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8628847235990768, "success_rate.epoch.global": 0.9068946796959826, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967311715481172, "tokens_p.mean_in_band": 0.7433035714285714, "tokens_rate.above_band": 0.9715447154471545, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028455284552845527 }, { "epoch": 2.3945462292288027, "grad_norm": 64.64369872626597, "learning_rate": 3.487944551286421e-07, "loss": 0.194, "step": 11240, "success_rate.epoch.env.abd": 0.9813664596273292, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.9487179487179487, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9168141592920354, "success_rate.epoch.env.math": 0.9733079122974261, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8514644351464435, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630841423925314, "success_rate.epoch.global": 0.9071467244179751, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995335820895522, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.3956114188325524, "grad_norm": 104.44353183729035, "learning_rate": 3.487616787605093e-07, "loss": 0.3379, "step": 11245, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9168141592920354, "success_rate.epoch.env.math": 0.9733333333333334, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8504867872044506, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631490241716787, "success_rate.epoch.global": 0.9068574514038877, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984008528784648, "tokens_p.mean_in_band": 0.5869766235351562, "tokens_rate.above_band": 0.989451476793249, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010548523206751054 }, { "epoch": 2.3966766084363016, "grad_norm": 116.84335950118749, "learning_rate": 3.487289105827842e-07, "loss": 0.2582, "step": 11250, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.953757225433526, "success_rate.epoch.env.logic": 0.9169611307420494, "success_rate.epoch.env.math": 0.9734345351043643, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.8509015256588072, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624718848521936, "success_rate.epoch.global": 0.9068389876144318, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990202145214522, "tokens_p.mean_in_band": 0.7897135416666666, "tokens_rate.above_band": 0.9901960784313726, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00980392156862745 }, { "epoch": 2.3977417980400513, "grad_norm": 34.8890199872237, "learning_rate": 3.4869615061934806e-07, "loss": 0.1608, "step": 11255, "success_rate.epoch.env.abd": 0.9814814814814815, "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9173989455184535, "success_rate.epoch.env.math": 0.9734848484848485, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.851313969571231, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8625779153196057, "success_rate.epoch.global": 0.9070891514500538, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9944968553459119, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9968652037617555, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003134796238244514 }, { "epoch": 2.3988069876438005, "grad_norm": 142.41336638126378, "learning_rate": 3.4866339889407614e-07, "loss": 0.2173, "step": 11260, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, "success_rate.epoch.env.agentgym:sciworld": 0.95, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9181184668989547, "success_rate.epoch.env.math": 0.9734848484848485, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8515193370165746, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8623794963272602, "success_rate.epoch.global": 0.9070701660417783, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996149289099526, "tokens_p.mean_in_band": 0.6040900735294118, "tokens_rate.above_band": 0.9254385964912281, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07456140350877193 }, { "epoch": 2.39987217724755, "grad_norm": 131.5516543860267, "learning_rate": 3.4863065543083766e-07, "loss": 0.2447, "step": 11265, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8598130841121495, "success_rate.epoch.env.agentgym:sciworld": 0.9504132231404959, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9181184668989547, "success_rate.epoch.env.math": 0.9735349716446124, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.852233676975945, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8624865587144993, "success_rate.epoch.global": 0.9073183760683761, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948369565217391, "tokens_p.mean_in_band": 0.765625, "tokens_rate.above_band": 0.9913793103448276, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008620689655172414 }, { "epoch": 2.4009373668512994, "grad_norm": 206.4952815685344, "learning_rate": 3.485979202534958e-07, "loss": 0.3351, "step": 11270, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9545454545454546, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9182608695652174, "success_rate.epoch.env.math": 0.9735599622285175, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8519533927347498, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8626787451725261, "success_rate.epoch.global": 0.9072988811933937, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990871659415786, "tokens_p.mean_in_band": 0.46337890625, "tokens_rate.above_band": 0.9950525664811379, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004947433518862091 }, { "epoch": 2.402002556455049, "grad_norm": 6.389346849192408, "learning_rate": 3.4856519338590766e-07, "loss": 0.1308, "step": 11275, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9182608695652174, "success_rate.epoch.env.math": 0.9736098020735156, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8525597269624573, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.862941405536093, "success_rate.epoch.global": 0.9075451647183846, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996875, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9982788296041308, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0017211703958691911 }, { "epoch": 2.4030677460587984, "grad_norm": 10.013904024918261, "learning_rate": 3.4853247485192444e-07, "loss": 0.1659, "step": 11280, "success_rate.epoch.env.abd": 0.9817073170731707, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9508196721311475, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9186851211072664, "success_rate.epoch.env.math": 0.9737089201877934, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8521798365122616, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8629544491002867, "success_rate.epoch.global": 0.9075251722310546, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959239130434783, "tokens_p.mean_below_band": 1.8533319234848022e-07, "tokens_p.mean_in_band": 0.7756696428571429, "tokens_rate.above_band": 0.8598130841121495, "tokens_rate.below_band": 0.009345794392523364, "tokens_rate.in_band": 0.1308411214953271 }, { "epoch": 2.404132935662548, "grad_norm": 146.73062001564242, "learning_rate": 3.484997646753912e-07, "loss": 0.2775, "step": 11285, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9512195121951219, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.918825561312608, "success_rate.epoch.env.math": 0.9738317757009346, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8523809523809524, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630430960453922, "success_rate.epoch.global": 0.90776955602537, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969093406593407, "tokens_p.mean_in_band": 0.7421875, "tokens_rate.above_band": 0.994535519125683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00546448087431694 }, { "epoch": 2.4051981252662973, "grad_norm": 210.47051997233478, "learning_rate": 3.484670628801468e-07, "loss": 0.3161, "step": 11290, "success_rate.epoch.env.abd": 0.9819277108433735, "success_rate.epoch.env.agentgym:alfworld": 0.8611111111111112, "success_rate.epoch.env.agentgym:sciworld": 0.9516129032258065, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.918825561312608, "success_rate.epoch.env.math": 0.9739776951672863, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8519021739130435, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630585561412393, "success_rate.epoch.global": 0.9077490774907749, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982638888888888, "tokens_p.mean_in_band": 0.6930803571428571, "tokens_rate.above_band": 0.9536423841059603, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046357615894039736 }, { "epoch": 2.406263314870047, "grad_norm": 47.26272492608127, "learning_rate": 3.4843436949002404e-07, "loss": 0.1687, "step": 11295, "success_rate.epoch.env.abd": 0.9820359281437125, "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, "success_rate.epoch.env.agentgym:sciworld": 0.9516129032258065, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.918825561312608, "success_rate.epoch.env.math": 0.974025974025974, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.851150202976996, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8631202593424018, "success_rate.epoch.global": 0.907465825446898, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990335051546392, "tokens_p.mean_below_band": 3.688037395477295e-07, "tokens_p.mean_in_band": 0.6334635416666666, "tokens_rate.above_band": 0.9822784810126582, "tokens_rate.below_band": 0.002531645569620253, "tokens_rate.in_band": 0.015189873417721518 }, { "epoch": 2.407328504473796, "grad_norm": 276.7905843791326, "learning_rate": 3.4840168452884967e-07, "loss": 0.1136, "step": 11300, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, "success_rate.epoch.env.agentgym:sciworld": 0.952, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.918825561312608, "success_rate.epoch.env.math": 0.9740980573543015, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.851652056641942, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863217346866639, "success_rate.epoch.global": 0.9077084425799685, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978885135135135, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.408393694077546, "grad_norm": 44.02478639331081, "learning_rate": 3.483690080204443e-07, "loss": 0.3458, "step": 11305, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, "success_rate.epoch.env.agentgym:sciworld": 0.9523809523809523, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9189655172413793, "success_rate.epoch.env.math": 0.9742173112338858, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8511784511784511, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632420941660044, "success_rate.epoch.global": 0.9076882845188284, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0003824429277477, "tokens_p.mean_in_band": 0.66796875, "tokens_rate.above_band": 0.9981207423067888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0018792576932111817 }, { "epoch": 2.409458883681295, "grad_norm": 86.93008222328166, "learning_rate": 3.4833633998862235e-07, "loss": 0.2578, "step": 11310, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.8623853211009175, "success_rate.epoch.env.agentgym:sciworld": 0.953125, "success_rate.epoch.env.agentgym:textcraft": 0.9629629629629629, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9189655172413793, "success_rate.epoch.env.math": 0.9742883379247016, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8516778523489933, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633615919369504, "success_rate.epoch.global": 0.9079290558163797, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975066489361702, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.410524073285045, "grad_norm": 80.27174008216069, "learning_rate": 3.4830368045719193e-07, "loss": 0.116, "step": 11315, "success_rate.epoch.env.abd": 0.9822485207100592, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.953125, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9565217391304348, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9192439862542955, "success_rate.epoch.env.math": 0.9743119266055046, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8518766756032171, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637095892528315, "success_rate.epoch.global": 0.908168574401665, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992198043184886, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.411589262888794, "grad_norm": 77.62408344093436, "learning_rate": 3.482710294499552e-07, "loss": 0.1487, "step": 11320, "success_rate.epoch.env.abd": 0.9823529411764705, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9196581196581196, "success_rate.epoch.env.math": 0.9743354720439963, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.8518766756032171, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640009952612904, "success_rate.epoch.global": 0.9084068500259471, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.4126544524925437, "grad_norm": 82.86894299641722, "learning_rate": 3.482383869907078e-07, "loss": 0.2435, "step": 11325, "success_rate.epoch.env.abd": 0.9823529411764705, "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9197952218430034, "success_rate.epoch.env.math": 0.9744292237442922, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8521739130434782, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638809652137468, "success_rate.epoch.global": 0.9083850931677019, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980743838028169, "tokens_p.mean_in_band": 0.59765625, "tokens_rate.above_band": 0.9759450171821306, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024054982817869417 }, { "epoch": 2.413719642096293, "grad_norm": 68.71597285807619, "learning_rate": 3.4820575310323957e-07, "loss": 0.1064, "step": 11330, "success_rate.epoch.env.abd": 0.9826589595375722, "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9185059422750425, "success_rate.epoch.env.math": 0.9744990892531876, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8522727272727273, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638069123529544, "success_rate.epoch.global": 0.908363448631905, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0, "tokens_p.mean_in_band": 0.4632352941176471, "tokens_rate.above_band": 0.9733542319749217, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02664576802507837 }, { "epoch": 2.4147848317000427, "grad_norm": 108.17849516842011, "learning_rate": 3.4817312781133367e-07, "loss": 0.3906, "step": 11335, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9560439560439561, "success_rate.epoch.env.logic": 0.9187817258883249, "success_rate.epoch.env.math": 0.9745685740236149, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8506666666666667, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637013549829301, "success_rate.epoch.global": 0.907826982492276, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9941770186335404, "tokens_p.mean_in_band": 0.5744243421052632, "tokens_rate.above_band": 0.8944444444444445, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10555555555555556 }, { "epoch": 2.415850021303792, "grad_norm": 429.42557788297694, "learning_rate": 3.4814051113876723e-07, "loss": 0.2361, "step": 11340, "success_rate.epoch.env.abd": 0.9828571428571429, "success_rate.epoch.env.agentgym:alfworld": 0.8648648648648649, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9187817258883249, "success_rate.epoch.env.math": 0.9746146872166818, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8504983388704319, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637426359511845, "success_rate.epoch.global": 0.9078068823831535, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992732558139535, "tokens_p.mean_in_band": 0.5872395833333334, "tokens_rate.above_band": 0.9862385321100917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013761467889908258 }, { "epoch": 2.4169152109075416, "grad_norm": 100.54038489364434, "learning_rate": 3.4810790310931106e-07, "loss": 0.2291, "step": 11345, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9534883720930233, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9187817258883249, "success_rate.epoch.env.math": 0.9746606334841629, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8495692511597084, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638110127230142, "success_rate.epoch.global": 0.907530737704918, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975190033783784, "tokens_p.mean_in_band": 0.4833984375, "tokens_rate.above_band": 0.9801324503311258, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019867549668874173 }, { "epoch": 2.417980400511291, "grad_norm": 293.76814639339, "learning_rate": 3.480753037467297e-07, "loss": 0.2855, "step": 11350, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9175084175084175, "success_rate.epoch.env.math": 0.974706413730804, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8498677248677249, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637590788437307, "success_rate.epoch.global": 0.9075114971895759, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980808597748209, "tokens_p.mean_in_band": 0.6023706896551724, "tokens_rate.above_band": 0.9711729622266402, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02882703777335984 }, { "epoch": 2.4190455901150405, "grad_norm": 83.88927403350766, "learning_rate": 3.480427130747812e-07, "loss": 0.1484, "step": 11355, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9567567567567568, "success_rate.epoch.env.logic": 0.9179229480737019, "success_rate.epoch.env.math": 0.9747292418772563, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8504611330698287, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638527849268072, "success_rate.epoch.global": 0.9077471967380224, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953571428571428, "tokens_p.mean_in_band": 0.7916666666666666, "tokens_rate.above_band": 0.9668508287292817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03314917127071823 }, { "epoch": 2.42011077971879, "grad_norm": 90.26815823858966, "learning_rate": 3.480101311172175e-07, "loss": 0.1995, "step": 11360, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9518716577540107, "success_rate.epoch.env.logic": 0.9183333333333333, "success_rate.epoch.env.math": 0.9747974797479748, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8506578947368421, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634700836354088, "success_rate.epoch.global": 0.9077275038129131, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938478171334432, "tokens_p.mean_below_band": 1.1874362826347351e-08, "tokens_p.mean_in_band": 0.5233415233415234, "tokens_rate.above_band": 0.8992592592592593, "tokens_rate.below_band": 0.0002469135802469136, "tokens_rate.in_band": 0.10049382716049382 }, { "epoch": 2.4211759693225394, "grad_norm": 89.88018945924556, "learning_rate": 3.479775578977841e-07, "loss": 0.2932, "step": 11365, "success_rate.epoch.env.abd": 0.9831460674157303, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9518716577540107, "success_rate.epoch.env.logic": 0.9183333333333333, "success_rate.epoch.env.math": 0.974910394265233, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8510498687664042, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635246389540381, "success_rate.epoch.global": 0.9079614604462475, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9899193548387096, "tokens_p.mean_in_band": 0.7555803571428571, "tokens_rate.above_band": 0.9465648854961832, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05343511450381679 }, { "epoch": 2.4222411589262887, "grad_norm": 30.385510202584697, "learning_rate": 3.4794499344022005e-07, "loss": 0.1685, "step": 11370, "success_rate.epoch.env.abd": 0.9831460674157303, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.9183333333333333, "success_rate.epoch.env.math": 0.9750445632798574, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8513425016371972, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635867120182085, "success_rate.epoch.global": 0.9081942336874052, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985677083333333, "tokens_p.mean_in_band": 0.5966796875, "tokens_rate.above_band": 0.9836065573770492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01639344262295082 }, { "epoch": 2.4233063485300383, "grad_norm": 42.159334060110666, "learning_rate": 3.4791243776825814e-07, "loss": 0.2069, "step": 11375, "success_rate.epoch.env.abd": 0.9832402234636871, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9538461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9183333333333333, "success_rate.epoch.env.math": 0.9751111111111112, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8511749347258486, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636091147249673, "success_rate.epoch.global": 0.9081735620585267, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945175438596491, "tokens_p.mean_in_band": 0.4521484375, "tokens_rate.above_band": 0.9344262295081968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06557377049180328 }, { "epoch": 2.424371538133788, "grad_norm": 35.188423877498934, "learning_rate": 3.478798909056248e-07, "loss": 0.1762, "step": 11380, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9172185430463576, "success_rate.epoch.env.math": 0.9751332149200711, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8513689700130378, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635906969687422, "success_rate.epoch.global": 0.9081529944640161, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974755700325733, "tokens_p.mean_in_band": 0.737109375, "tokens_rate.above_band": 0.9935275080906149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006472491909385114 }, { "epoch": 2.4254367277375373, "grad_norm": 233.357671967341, "learning_rate": 3.4784735287603987e-07, "loss": 0.2373, "step": 11385, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9172185430463576, "success_rate.epoch.env.math": 0.975177304964539, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8521400778210116, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636648058644187, "success_rate.epoch.global": 0.9083835341365462, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930862831858407, "tokens_p.mean_in_band": 0.8037109375, "tokens_rate.above_band": 0.9338842975206612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06611570247933884 }, { "epoch": 2.4265019173412865, "grad_norm": 85.55642516680025, "learning_rate": 3.4781482370321677e-07, "loss": 0.2777, "step": 11390, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9642857142857143, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.9174917491749175, "success_rate.epoch.env.math": 0.9752431476569408, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8526179702650291, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637390732521442, "success_rate.epoch.global": 0.9086129193790686, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919117647058824, "tokens_p.mean_in_band": 0.8015625, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 2.427567106945036, "grad_norm": 302.58357906753685, "learning_rate": 3.477823034108627e-07, "loss": 0.1959, "step": 11395, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.9180327868852459, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8527131782945736, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639373704663533, "success_rate.epoch.global": 0.9088411588411588, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990646258503402, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.428632296548786, "grad_norm": 86.80230576270047, "learning_rate": 3.47749792022678e-07, "loss": 0.2918, "step": 11400, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8660714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9181669394435352, "success_rate.epoch.env.math": 0.9753954305799648, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8518041237113402, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638971255052514, "success_rate.epoch.global": 0.908570004982561, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968245967741935, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.9810126582278481, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0189873417721519 }, { "epoch": 2.429697486152535, "grad_norm": 194.61855717702295, "learning_rate": 3.477172895623569e-07, "loss": 0.2382, "step": 11405, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9183006535947712, "success_rate.epoch.env.math": 0.9754601226993865, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8521850899742931, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640575419716445, "success_rate.epoch.global": 0.9087972166998012, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981449771689498, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9984802431610942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001519756838905775 }, { "epoch": 2.430762675756285, "grad_norm": 361.56079632673146, "learning_rate": 3.4768479605358683e-07, "loss": 0.2162, "step": 11410, "success_rate.epoch.env.abd": 0.9834254143646409, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9533678756476683, "success_rate.epoch.env.logic": 0.9185667752442996, "success_rate.epoch.env.math": 0.9746724890829694, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8523748395378691, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640578323603413, "success_rate.epoch.global": 0.9087754090233019, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978618421052632, "tokens_p.mean_in_band": 0.29296875, "tokens_rate.above_band": 0.9947643979057592, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005235602094240838 }, { "epoch": 2.431827865360034, "grad_norm": 103.97261188741398, "learning_rate": 3.47652311520049e-07, "loss": 0.3553, "step": 11415, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9536082474226805, "success_rate.epoch.env.logic": 0.9186991869918699, "success_rate.epoch.env.math": 0.974716652136007, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8514724711907811, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640301706549721, "success_rate.epoch.global": 0.9085064292779427, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967320261437909, "tokens_p.mean_in_band": 0.6657366071428571, "tokens_rate.above_band": 0.9954456733897202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004554326610279766 }, { "epoch": 2.4328930549637837, "grad_norm": 65.81892410091692, "learning_rate": 3.476198359854177e-07, "loss": 0.3064, "step": 11420, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9541984732824428, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9189627228525121, "success_rate.epoch.env.math": 0.9747826086956521, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8517571884984025, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641290429239075, "success_rate.epoch.global": 0.908732116428219, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997360641891891, "tokens_p.mean_in_band": 0.703125, "tokens_rate.above_band": 0.9983136593591906, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016863406408094434 }, { "epoch": 2.433958244567533, "grad_norm": 127.67852529289556, "learning_rate": 3.47587369473361e-07, "loss": 0.099, "step": 11425, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.919093851132686, "success_rate.epoch.env.math": 0.9748263888888888, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8523233609166136, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642279576652379, "success_rate.epoch.global": 0.9089566929133859, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.993646978021978, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9891304347826086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010869565217391304 }, { "epoch": 2.4350234341712826, "grad_norm": 85.56016805031243, "learning_rate": 3.475549120075402e-07, "loss": 0.1275, "step": 11430, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9193548387096774, "success_rate.epoch.env.math": 0.9749134948096886, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8526984126984127, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642936981452006, "success_rate.epoch.global": 0.9091801669121257, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.8141741071428571, "tokens_rate.above_band": 0.9278350515463918, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07216494845360824 }, { "epoch": 2.436088623775032, "grad_norm": 92.90690599262439, "learning_rate": 3.4752246361161016e-07, "loss": 0.2782, "step": 11435, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9540816326530612, "success_rate.epoch.env.logic": 0.9193548387096774, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8532574320050601, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8643523821903787, "success_rate.epoch.global": 0.9094025465230167, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.993287037037037, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9854014598540146, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014598540145985401 }, { "epoch": 2.4371538133787816, "grad_norm": 143.73509558216423, "learning_rate": 3.47490024309219e-07, "loss": 0.3133, "step": 11440, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9545454545454546, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.9196141479099679, "success_rate.epoch.env.math": 0.9750215331610681, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8538122243226213, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644495388120188, "success_rate.epoch.global": 0.9096238397655105, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992843511450382, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.438219002982531, "grad_norm": 259.5958069642486, "learning_rate": 3.4745759412400813e-07, "loss": 0.3571, "step": 11445, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.92, "success_rate.epoch.env.math": 0.9750644883920895, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8535512256442489, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644958635522314, "success_rate.epoch.global": 0.9096003898635477, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962225274725275, "tokens_p.mean_in_band": 0.6966145833333334, "tokens_rate.above_band": 0.9238578680203046, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07614213197969544 }, { "epoch": 2.4392841925862805, "grad_norm": 99.21318413765184, "learning_rate": 3.4742517307961257e-07, "loss": 0.4552, "step": 11450, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8672566371681416, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9655172413793104, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9543147208121827, "success_rate.epoch.env.logic": 0.9202551834130781, "success_rate.epoch.env.math": 0.9751499571550986, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8532915360501567, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8645032237869583, "success_rate.epoch.global": 0.9095770539620807, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950657894736842, "tokens_p.mean_in_band": 0.6218039772727273, "tokens_rate.above_band": 0.8962264150943396, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10377358490566038 }, { "epoch": 2.4403493821900297, "grad_norm": 37.63868455620462, "learning_rate": 3.473927611996605e-07, "loss": 0.262, "step": 11455, "success_rate.epoch.env.abd": 0.9837837837837838, "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9203821656050956, "success_rate.epoch.env.math": 0.9751499571550986, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8536585365853658, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639981195863641, "success_rate.epoch.global": 0.9095538312318138, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995216836734694, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.9987261146496815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012738853503184713 }, { "epoch": 2.4414145717937794, "grad_norm": 505.1513087001165, "learning_rate": 3.4736035850777347e-07, "loss": 0.2379, "step": 11460, "success_rate.epoch.env.abd": 0.9837837837837838, "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.919047619047619, "success_rate.epoch.env.math": 0.9752136752136752, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8533998752339363, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638798400364505, "success_rate.epoch.global": 0.909288824383164, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0002132444168734, "tokens_p.mean_in_band": 0.5558449074074074, "tokens_rate.above_band": 0.9835265405735204, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01647345942647956 }, { "epoch": 2.4424797613975286, "grad_norm": 154.4784904410716, "learning_rate": 3.4732796502756637e-07, "loss": 0.2254, "step": 11465, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9177215189873418, "success_rate.epoch.env.math": 0.9752559726962458, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8538557213930348, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638124970855, "success_rate.epoch.global": 0.9092664092664092, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964345637583892, "tokens_p.mean_in_band": 0.54296875, "tokens_rate.above_band": 0.962843295638126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03715670436187399 }, { "epoch": 2.4435449510012783, "grad_norm": 82.59548213976454, "learning_rate": 3.472955807826472e-07, "loss": 0.1186, "step": 11470, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8596491228070176, "success_rate.epoch.env.agentgym:sciworld": 0.9548872180451128, "success_rate.epoch.env.agentgym:textcraft": 0.9666666666666667, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9178515007898894, "success_rate.epoch.env.math": 0.9753191489361702, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8543990086741016, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638794466603671, "success_rate.epoch.global": 0.909484833895041, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9909274193548387, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.4446101406050276, "grad_norm": 211.67444853530813, "learning_rate": 3.472632057966176e-07, "loss": 0.2402, "step": 11475, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.853448275862069, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.9178515007898894, "success_rate.epoch.env.math": 0.9753610875106202, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8547589616810878, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634806262972393, "success_rate.epoch.global": 0.909462055715658, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997656955736224, "tokens_p.mean_in_band": 0.572265625, "tokens_rate.above_band": 0.9981965734896303, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0018034265103697023 }, { "epoch": 2.4456753302087773, "grad_norm": 103.98837166252731, "learning_rate": 3.47230840093072e-07, "loss": 0.3586, "step": 11480, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, "success_rate.epoch.env.agentgym:sciworld": 0.9552238805970149, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9547738693467337, "success_rate.epoch.env.logic": 0.917981072555205, "success_rate.epoch.env.math": 0.9754445385266723, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8551171393341553, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636464244129866, "success_rate.epoch.global": 0.9096789650215621, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.8528645833333334, "tokens_rate.above_band": 0.9810126582278481, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0189873417721519 }, { "epoch": 2.4467405198125265, "grad_norm": 78.73909494746351, "learning_rate": 3.471984836955983e-07, "loss": 0.2572, "step": 11485, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, "success_rate.epoch.env.agentgym:sciworld": 0.9558823529411765, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.9583333333333334, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.917981072555205, "success_rate.epoch.env.math": 0.9755067567567568, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8547692307692307, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637212238002232, "success_rate.epoch.global": 0.9096558317399618, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961622807017544, "tokens_p.mean_in_band": 0.5792410714285714, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 2.447805709416276, "grad_norm": 152.68207496662538, "learning_rate": 3.4716613662777785e-07, "loss": 0.2705, "step": 11490, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, "success_rate.epoch.env.agentgym:sciworld": 0.9568345323741008, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9181102362204724, "success_rate.epoch.env.math": 0.975548060708263, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.855036855036855, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639991272533133, "success_rate.epoch.global": 0.9098712446351931, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982221006564551, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9978165938864629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002183406113537118 }, { "epoch": 2.4488708990200254, "grad_norm": 56.7599018824399, "learning_rate": 3.471337989131847e-07, "loss": 0.2709, "step": 11495, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8547008547008547, "success_rate.epoch.env.agentgym:sciworld": 0.9568345323741008, "success_rate.epoch.env.agentgym:textcraft": 0.967741935483871, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9184952978056427, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8553034947884733, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640718225474483, "success_rate.epoch.global": 0.9100856327307326, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9942129629629629, "tokens_p.mean_in_band": 0.7447916666666666, "tokens_rate.above_band": 0.9642857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03571428571428571 }, { "epoch": 2.449936088623775, "grad_norm": 113.19760536961066, "learning_rate": 3.471014705753865e-07, "loss": 0.2691, "step": 11500, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8559322033898306, "success_rate.epoch.env.agentgym:sciworld": 0.9568345323741008, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.918622848200313, "success_rate.epoch.env.math": 0.9756711409395973, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.854434250764526, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642135593127087, "success_rate.epoch.global": 0.9098243948742287, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977176966292135, "tokens_p.mean_in_band": 0.33735795454545453, "tokens_rate.above_band": 0.9798165137614679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02018348623853211 }, { "epoch": 2.4510012782275243, "grad_norm": 24.636930932812355, "learning_rate": 3.4706915163794375e-07, "loss": 0.1825, "step": 11505, "success_rate.epoch.env.abd": 0.983957219251337, "success_rate.epoch.env.agentgym:alfworld": 0.8559322033898306, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9188767550702028, "success_rate.epoch.env.math": 0.975752508361204, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8540012217470984, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.864252853312733, "success_rate.epoch.global": 0.9098011363636364, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980736301369864, "tokens_p.mean_in_band": 0.2848557692307692, "tokens_rate.above_band": 0.9573770491803278, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04262295081967213 }, { "epoch": 2.452066467831274, "grad_norm": 153.46031226260934, "learning_rate": 3.470368421244105e-07, "loss": 0.2244, "step": 11510, "success_rate.epoch.env.abd": 0.9842105263157894, "success_rate.epoch.env.agentgym:alfworld": 0.8487394957983193, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9190031152647975, "success_rate.epoch.env.math": 0.975752508361204, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.853836784409257, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863618537160887, "success_rate.epoch.global": 0.9095418044402457, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988407258064517, "tokens_p.mean_in_band": 0.5881696428571429, "tokens_rate.above_band": 0.9779179810725552, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022082018927444796 }, { "epoch": 2.4531316574350233, "grad_norm": 50.78545166643288, "learning_rate": 3.4700454205833345e-07, "loss": 0.1686, "step": 11515, "success_rate.epoch.env.abd": 0.9842931937172775, "success_rate.epoch.env.agentgym:alfworld": 0.8487394957983193, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9191290824261276, "success_rate.epoch.env.math": 0.9758333333333333, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8541919805589308, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636771422231252, "success_rate.epoch.global": 0.9097549481621112, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967873831775701, "tokens_p.mean_in_band": 0.8463541666666666, "tokens_rate.above_band": 0.9727272727272728, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02727272727272727 }, { "epoch": 2.454196847038773, "grad_norm": 235.36057636835326, "learning_rate": 3.469722514632528e-07, "loss": 0.3356, "step": 11520, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9193798449612403, "success_rate.epoch.env.math": 0.9758735440931781, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8539393939393939, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8638026601467456, "success_rate.epoch.global": 0.9097320169252469, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998766447368421, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.979381443298969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020618556701030927 }, { "epoch": 2.455262036642522, "grad_norm": 73.33270362818654, "learning_rate": 3.469399703627017e-07, "loss": 0.2989, "step": 11525, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.96875, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9554455445544554, "success_rate.epoch.env.logic": 0.9197530864197531, "success_rate.epoch.env.math": 0.9751243781094527, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.853599516031458, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637375871982775, "success_rate.epoch.global": 0.9094746716697936, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8055555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980357142857142, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9562841530054644, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04371584699453552 }, { "epoch": 2.456327226246272, "grad_norm": 54.47932816134175, "learning_rate": 3.4690769878020627e-07, "loss": 0.1991, "step": 11530, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9571428571428572, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.9202453987730062, "success_rate.epoch.env.math": 0.9751449875724938, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8537764350453172, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863926098008627, "success_rate.epoch.global": 0.9096864763687412, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0001041666666666, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.4573924158500215, "grad_norm": 89.9965072813347, "learning_rate": 3.4687543673928586e-07, "loss": 0.2778, "step": 11535, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9203675344563553, "success_rate.epoch.env.math": 0.9751449875724938, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8543046357615894, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636104839637032, "success_rate.epoch.global": 0.9096638655462185, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916038074712644, "tokens_p.mean_in_band": 0.6220118087557603, "tokens_rate.above_band": 0.8651336233685519, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1348663766314481 }, { "epoch": 2.458457605453771, "grad_norm": 440.90524424875747, "learning_rate": 3.4684318426345293e-07, "loss": 0.3518, "step": 11540, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9203675344563553, "success_rate.epoch.env.math": 0.9751861042183623, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8538046734571599, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863568770722016, "success_rate.epoch.global": 0.9094084769445738, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9866763565891473, "tokens_p.mean_in_band": 0.7350643382352942, "tokens_rate.above_band": 0.8835616438356164, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11643835616438356 }, { "epoch": 2.45952279505752, "grad_norm": 73.24932544978624, "learning_rate": 3.4681094137621275e-07, "loss": 0.1991, "step": 11545, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9208523592085236, "success_rate.epoch.env.math": 0.9752475247524752, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8534688995215312, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635879044811662, "success_rate.epoch.global": 0.9093866171003717, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9932324840764332, "tokens_p.mean_in_band": 0.673828125, "tokens_rate.above_band": 0.9289940828402367, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07100591715976332 }, { "epoch": 2.4605879846612697, "grad_norm": 52.70768315769327, "learning_rate": 3.4677870810106364e-07, "loss": 0.1735, "step": 11550, "success_rate.epoch.env.abd": 0.9844559585492227, "success_rate.epoch.env.agentgym:alfworld": 0.8512396694214877, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9209726443768997, "success_rate.epoch.env.math": 0.9752883031301482, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.85381861575179, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8637543960945312, "success_rate.epoch.global": 0.9095966620305981, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997163955479452, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.4616531742650194, "grad_norm": 254.35518799634457, "learning_rate": 3.467464844614972e-07, "loss": 0.1413, "step": 11555, "success_rate.epoch.env.abd": 0.9845360824742269, "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9514563106796117, "success_rate.epoch.env.logic": 0.9209726443768997, "success_rate.epoch.env.math": 0.9753492193919474, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8542534205829864, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639175953663454, "success_rate.epoch.global": 0.9098057354301573, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985795454545454, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9977324263038548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0022675736961451248 }, { "epoch": 2.4627183638687686, "grad_norm": 159.21691195554962, "learning_rate": 3.467142704809975e-07, "loss": 0.3126, "step": 11560, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, "success_rate.epoch.env.agentgym:sciworld": 0.9574468085106383, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9209726443768997, "success_rate.epoch.env.math": 0.9753896636587367, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8547717842323651, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8639969245148684, "success_rate.epoch.global": 0.9100138440239963, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984038978494624, "tokens_p.mean_in_band": 0.77734375, "tokens_rate.above_band": 0.9973190348525469, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002680965147453083 }, { "epoch": 2.463783553472518, "grad_norm": 71.30557236258782, "learning_rate": 3.466820661830421e-07, "loss": 0.2282, "step": 11565, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, "success_rate.epoch.env.agentgym:sciworld": 0.958041958041958, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.921092564491654, "success_rate.epoch.env.math": 0.9754901960784313, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8549437537004144, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8640867037452157, "success_rate.epoch.global": 0.9102209944751382, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984529702970297, "tokens_p.mean_in_band": 0.69140625, "tokens_rate.above_band": 0.9805825242718447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019417475728155338 }, { "epoch": 2.4648487430762676, "grad_norm": 82.9666043908078, "learning_rate": 3.466498715911011e-07, "loss": 0.1536, "step": 11570, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8524590163934426, "success_rate.epoch.env.agentgym:sciworld": 0.9583333333333334, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9516908212560387, "success_rate.epoch.env.logic": 0.9213313161875946, "success_rate.epoch.env.math": 0.9755501222493888, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8552009456264775, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641708617470311, "success_rate.epoch.global": 0.9104271933853928, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975369458128078, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9950980392156863, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004901960784313725 }, { "epoch": 2.4659139326800172, "grad_norm": 32.05132161979001, "learning_rate": 3.4661768672863774e-07, "loss": 0.1857, "step": 11575, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8536585365853658, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9519230769230769, "success_rate.epoch.env.logic": 0.9201807228915663, "success_rate.epoch.env.math": 0.9755700325732899, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.855457227138643, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642476554489256, "success_rate.epoch.global": 0.9104032997250229, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985320686540199, "tokens_p.mean_in_band": 0.49107142857142855, "tokens_rate.above_band": 0.9875111507582516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012488849241748439 }, { "epoch": 2.4669791222837665, "grad_norm": 39.7553168130122, "learning_rate": 3.4658551161910814e-07, "loss": 0.1681, "step": 11580, "success_rate.epoch.env.abd": 0.9846938775510204, "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9519230769230769, "success_rate.epoch.env.logic": 0.9201807228915663, "success_rate.epoch.env.math": 0.9756493506493507, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8552941176470589, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8643473265104281, "success_rate.epoch.global": 0.9103795153177869, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996510152284264, "tokens_p.mean_in_band": 0.7546875, "tokens_rate.above_band": 0.9516908212560387, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04830917874396135 }, { "epoch": 2.468044311887516, "grad_norm": 27.228230464070002, "learning_rate": 3.465533462859613e-07, "loss": 0.1176, "step": 11585, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.95260663507109, "success_rate.epoch.env.logic": 0.9204204204204204, "success_rate.epoch.env.math": 0.975669099756691, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8555490311215502, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644632914478592, "success_rate.epoch.global": 0.9105839416058394, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999467178175618, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.4691095014912654, "grad_norm": 44.531969535206905, "learning_rate": 3.4652119075263905e-07, "loss": 0.2738, "step": 11590, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.95260663507109, "success_rate.epoch.env.logic": 0.9204204204204204, "success_rate.epoch.env.math": 0.9749596122778675, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8559718969555035, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.864437234934689, "success_rate.epoch.global": 0.9105598543468366, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9913793103448276, "tokens_p.mean_in_band": 0.5, "tokens_rate.above_band": 0.9775280898876404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02247191011235955 }, { "epoch": 2.470174691095015, "grad_norm": 124.58083391346277, "learning_rate": 3.4648904504257606e-07, "loss": 0.3096, "step": 11595, "success_rate.epoch.env.abd": 0.9847715736040609, "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9210134128166915, "success_rate.epoch.env.math": 0.9749596122778675, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8557242990654206, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644889574905386, "success_rate.epoch.global": 0.9105358764759309, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971264367816092, "tokens_p.mean_in_band": 0.5404459635416666, "tokens_rate.above_band": 0.9886363636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011363636363636364 }, { "epoch": 2.4712398806987643, "grad_norm": 89.69715584799415, "learning_rate": 3.464569091792e-07, "loss": 0.2962, "step": 11600, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9211309523809523, "success_rate.epoch.env.math": 0.9750402576489533, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.8548951048951049, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8644385849459436, "success_rate.epoch.global": 0.9102854553692795, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9929775280898876, "tokens_p.mean_in_band": 0.400146484375, "tokens_rate.above_band": 0.8476190476190476, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1523809523809524 }, { "epoch": 2.472305070302514, "grad_norm": 48.56348724429622, "learning_rate": 3.4642478318593124e-07, "loss": 0.206, "step": 11605, "success_rate.epoch.env.abd": 0.985, "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9528301886792453, "success_rate.epoch.env.logic": 0.9212481426448736, "success_rate.epoch.env.math": 0.9750603378921963, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8547356188262638, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8641913391951104, "success_rate.epoch.global": 0.9100361663652803, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996625, "tokens_p.mean_in_band": 0.6397372159090909, "tokens_rate.above_band": 0.9191176470588235, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08088235294117647 }, { "epoch": 2.4733702599062632, "grad_norm": 67.10807693520958, "learning_rate": 3.463926670861829e-07, "loss": 0.1941, "step": 11610, "success_rate.epoch.env.abd": 0.985, "success_rate.epoch.env.agentgym:alfworld": 0.8548387096774194, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9213649851632048, "success_rate.epoch.env.math": 0.9751004016064257, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8546612623045744, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8642189759427573, "success_rate.epoch.global": 0.9100135317997293, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977134146341463, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9879518072289156, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012048192771084338 }, { "epoch": 2.474435449510013, "grad_norm": 129.39596683898907, "learning_rate": 3.4636056090336096e-07, "loss": 0.1696, "step": 11615, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9201183431952663, "success_rate.epoch.env.math": 0.9751602564102564, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8548293811451706, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8636211212219472, "success_rate.epoch.global": 0.9097659765976598, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984088594704684, "tokens_p.mean_in_band": 0.6881167763157895, "tokens_rate.above_band": 0.9627450980392157, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03725490196078431 }, { "epoch": 2.475500639113762, "grad_norm": 33.13922157173911, "learning_rate": 3.4632846466086426e-07, "loss": 0.2553, "step": 11620, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9189985272459499, "success_rate.epoch.env.math": 0.9752198241406874, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.8545874206578188, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8635027386122892, "success_rate.epoch.global": 0.9095195330040413, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8055555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9956530139103554, "tokens_p.mean_in_band": 0.45682565789473684, "tokens_rate.above_band": 0.9445255474452555, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05547445255474453 }, { "epoch": 2.476565828717512, "grad_norm": 43.805918696932984, "learning_rate": 3.462963783820843e-07, "loss": 0.1684, "step": 11625, "success_rate.epoch.env.abd": 0.9851485148514851, "success_rate.epoch.env.agentgym:alfworld": 0.8492063492063492, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9530516431924883, "success_rate.epoch.env.logic": 0.9191176470588235, "success_rate.epoch.env.math": 0.9752593774940144, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8544303797468354, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632691038482462, "success_rate.epoch.global": 0.9092741935483871, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968327702702703, "tokens_p.mean_in_band": 0.709375, "tokens_rate.above_band": 0.8809523809523809, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11904761904761904 }, { "epoch": 2.477631018321261, "grad_norm": 17.613045496522837, "learning_rate": 3.462643020904052e-07, "loss": 0.1682, "step": 11630, "success_rate.epoch.env.abd": 0.9851485148514851, "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9193548387096774, "success_rate.epoch.env.math": 0.9753184713375797, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8545977011494252, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634391350594243, "success_rate.epoch.global": 0.9094769780956639, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0015871278458843, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.4786962079250108, "grad_norm": 106.20804606607898, "learning_rate": 3.4623223580920414e-07, "loss": 0.2946, "step": 11635, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9194729136163983, "success_rate.epoch.env.math": 0.9753772835583797, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8544412607449857, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8634476447638282, "success_rate.epoch.global": 0.9094558429973238, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950810185185185, "tokens_p.mean_in_band": 0.5357730263157895, "tokens_rate.above_band": 0.8503937007874016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14960629921259844 }, { "epoch": 2.47976139752876, "grad_norm": 243.35404457537976, "learning_rate": 3.462001795618507e-07, "loss": 0.3275, "step": 11640, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8503937007874016, "success_rate.epoch.env.agentgym:sciworld": 0.9586206896551724, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9490740740740741, "success_rate.epoch.env.logic": 0.9194729136163983, "success_rate.epoch.env.math": 0.9754552652414885, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8542024013722127, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8630514782499215, "success_rate.epoch.global": 0.9092122830440588, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9901818813716404, "tokens_p.mean_in_band": 0.5979263630319149, "tokens_rate.above_band": 0.8516179952644041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1483820047355959 }, { "epoch": 2.4808265871325097, "grad_norm": 37.921405900959556, "learning_rate": 3.461681333717071e-07, "loss": 0.1655, "step": 11645, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.958904109589041, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9493087557603687, "success_rate.epoch.env.logic": 0.9198250728862973, "success_rate.epoch.env.math": 0.9754940711462451, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8542857142857143, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.863247949060914, "success_rate.epoch.global": 0.9094138543516874, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999754259501966, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.481891776736259, "grad_norm": 149.74655241448744, "learning_rate": 3.461360972621286e-07, "loss": 0.3679, "step": 11650, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, "success_rate.epoch.env.agentgym:textcraft": 0.9696969696969697, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9493087557603687, "success_rate.epoch.env.logic": 0.9188405797101449, "success_rate.epoch.env.math": 0.9755134281200631, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.854618015963512, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632158335478057, "success_rate.epoch.global": 0.909392999556934, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957729468599034, "tokens_p.mean_in_band": 0.7159598214285714, "tokens_rate.above_band": 0.9366515837104072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06334841628959276 }, { "epoch": 2.4829569663400086, "grad_norm": 121.2565545041361, "learning_rate": 3.461040712564628e-07, "loss": 0.202, "step": 11655, "success_rate.epoch.env.abd": 0.9852216748768473, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9591836734693877, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9495412844036697, "success_rate.epoch.env.logic": 0.9190751445086706, "success_rate.epoch.env.math": 0.9755713159968479, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8547835990888383, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633596362788136, "success_rate.epoch.global": 0.9095732920627901, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962071240105541, "tokens_p.mean_in_band": 0.7484375, "tokens_rate.above_band": 0.9869791666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013020833333333334 }, { "epoch": 2.484022155943758, "grad_norm": 160.30270076728354, "learning_rate": 3.4607205537804993e-07, "loss": 0.2054, "step": 11660, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9495412844036697, "success_rate.epoch.env.logic": 0.9191919191919192, "success_rate.epoch.env.math": 0.9748427672955975, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8545454545454545, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8633140281150772, "success_rate.epoch.global": 0.9093315684976837, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8833333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99609375, "tokens_p.mean_in_band": 0.6497395833333334, "tokens_rate.above_band": 0.9440993788819876, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055900621118012424 }, { "epoch": 2.4850873455475075, "grad_norm": 74.7418241140314, "learning_rate": 3.46040049650223e-07, "loss": 0.2437, "step": 11665, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9497716894977168, "success_rate.epoch.env.logic": 0.9193083573487032, "success_rate.epoch.env.math": 0.9749412685982772, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.8536585365853658, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8632738850781153, "success_rate.epoch.global": 0.9090909090909091, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9966313073394495, "tokens_p.mean_in_band": 0.6186079545454546, "tokens_rate.above_band": 0.9753914988814317, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024608501118568233 }, { "epoch": 2.486152535151257, "grad_norm": 76.71743606210104, "learning_rate": 3.4600805409630764e-07, "loss": 0.2261, "step": 11670, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9183381088825215, "success_rate.epoch.env.math": 0.9749804534792806, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8537414965986394, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661349439304203, "success_rate.epoch.global": 0.9090709422358885, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000458211143695, "tokens_p.mean_in_band": 0.4696044921875, "tokens_rate.above_band": 0.9696682464454977, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03033175355450237 }, { "epoch": 2.4872177247550065, "grad_norm": 186.86214227877957, "learning_rate": 3.459760687396219e-07, "loss": 0.2013, "step": 11675, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9504504504504504, "success_rate.epoch.env.logic": 0.9186875891583453, "success_rate.epoch.env.math": 0.9750195160031225, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.853423882286361, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661823420156786, "success_rate.epoch.global": 0.9090510628972167, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984012619372442, "tokens_p.mean_in_band": 0.615625, "tokens_rate.above_band": 0.9966009517335146, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003399048266485384 }, { "epoch": 2.4882829143587557, "grad_norm": 15.866613156525055, "learning_rate": 3.4594409360347647e-07, "loss": 0.1673, "step": 11680, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.9594594594594594, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9504504504504504, "success_rate.epoch.env.logic": 0.9190340909090909, "success_rate.epoch.env.math": 0.9751166407465007, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.853024307518372, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661863467180542, "success_rate.epoch.global": 0.9090312705007654, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916930379746836, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.9634146341463414, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036585365853658534 }, { "epoch": 2.4893481039625054, "grad_norm": 75.41214746544202, "learning_rate": 3.459121287111746e-07, "loss": 0.235, "step": 11685, "success_rate.epoch.env.abd": 0.9853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9192634560906515, "success_rate.epoch.env.math": 0.9751359751359752, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8532731376975169, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8663163716794672, "success_rate.epoch.global": 0.909229762164521, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0007974952741021, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9995276334435522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0004723665564478035 }, { "epoch": 2.4904132935662546, "grad_norm": 60.60112702534587, "learning_rate": 3.458801740860121e-07, "loss": 0.3327, "step": 11690, "success_rate.epoch.env.abd": 0.9854368932038835, "success_rate.epoch.env.agentgym:alfworld": 0.8515625, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9192634560906515, "success_rate.epoch.env.math": 0.9751745539177658, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8532883642495784, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8663277212139399, "success_rate.epoch.global": 0.9092096668843893, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923155737704918, "tokens_p.mean_in_band": 0.74140625, "tokens_rate.above_band": 0.9606299212598425, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03937007874015748 }, { "epoch": 2.4914784831700043, "grad_norm": 83.3353500087414, "learning_rate": 3.4584822975127727e-07, "loss": 0.3095, "step": 11695, "success_rate.epoch.env.abd": 0.9854368932038835, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9705882352941176, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9194915254237288, "success_rate.epoch.env.math": 0.9752130131680867, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8525784753363229, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866392022847096, "success_rate.epoch.global": 0.9089724092982837, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971374045801527, "tokens_p.mean_in_band": 0.501678466796875, "tokens_rate.above_band": 0.9703703703703703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02962962962962963 }, { "epoch": 2.4925436727737535, "grad_norm": 230.28593129715713, "learning_rate": 3.4581629573025084e-07, "loss": 0.2218, "step": 11700, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9197183098591549, "success_rate.epoch.env.math": 0.9752895752895753, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8527435610302352, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.866517397534206, "success_rate.epoch.global": 0.9091697376978105, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955065359477124, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.4936088623775032, "grad_norm": 128.89215135374454, "learning_rate": 3.4578437204620617e-07, "loss": 0.2854, "step": 11705, "success_rate.epoch.env.abd": 0.9856459330143541, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.959731543624161, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9200561009817672, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.852513966480447, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8665415747974877, "success_rate.epoch.global": 0.90914990266061, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993421052631579, "tokens_p.mean_in_band": 0.365234375, "tokens_rate.above_band": 0.9405940594059405, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0594059405940594 }, { "epoch": 2.4946740519812525, "grad_norm": 591.9981165598932, "learning_rate": 3.457524587224091e-07, "loss": 0.2783, "step": 11710, "success_rate.epoch.env.abd": 0.985781990521327, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.96, "success_rate.epoch.env.agentgym:textcraft": 0.9714285714285714, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9511111111111111, "success_rate.epoch.env.logic": 0.9201680672268907, "success_rate.epoch.env.math": 0.9753276792598303, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.852924791086351, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666276058900659, "success_rate.epoch.global": 0.9093459961148284, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993610594795539, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9981447124304267, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0018552875695732839 }, { "epoch": 2.495739241585002, "grad_norm": 101.91699001828762, "learning_rate": 3.4572055578211756e-07, "loss": 0.2473, "step": 11715, "success_rate.epoch.env.abd": 0.9858490566037735, "success_rate.epoch.env.agentgym:alfworld": 0.8527131782945736, "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9513274336283186, "success_rate.epoch.env.logic": 0.9189944134078212, "success_rate.epoch.env.math": 0.9753656658968437, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8530884808013356, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8666612388526556, "success_rate.epoch.global": 0.9093258668964032, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0008012820512822, "tokens_p.mean_in_band": 0.7483258928571429, "tokens_rate.above_band": 0.9925768822905621, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007423117709437964 }, { "epoch": 2.4968044311887514, "grad_norm": 446.2540681917151, "learning_rate": 3.456886632485825e-07, "loss": 0.3318, "step": 11720, "success_rate.epoch.env.abd": 0.9858490566037735, "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9192200557103064, "success_rate.epoch.env.math": 0.9754035357417371, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8527777777777777, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8660801382685682, "success_rate.epoch.global": 0.9090909090909091, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982881310418904, "tokens_p.mean_in_band": 0.6337890625, "tokens_rate.above_band": 0.9883227176220807, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01167728237791932 }, { "epoch": 2.497869620792501, "grad_norm": 163.65331932638185, "learning_rate": 3.456567811450468e-07, "loss": 0.2001, "step": 11725, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8461538461538461, "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9192200557103064, "success_rate.epoch.env.math": 0.97544128933231, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8533480907581626, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661414567162891, "success_rate.epoch.global": 0.9092858674672958, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956219806763285, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9764150943396226, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02358490566037736 }, { "epoch": 2.4989348103962508, "grad_norm": 22.243719943978714, "learning_rate": 3.456249094947458e-07, "loss": 0.185, "step": 11730, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9196675900277008, "success_rate.epoch.env.math": 0.9754601226993865, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8535911602209945, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8663186976909922, "success_rate.epoch.global": 0.9094799914401883, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981409348441926, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.9943661971830986, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005633802816901409 }, { "epoch": 2.5, "grad_norm": 108.83580877913427, "learning_rate": 3.4559304832090754e-07, "loss": 0.2537, "step": 11735, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9602649006622517, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9186206896551724, "success_rate.epoch.env.math": 0.975553857906799, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8537527593818984, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8662467371451549, "success_rate.epoch.global": 0.9094597480247705, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985608552631579, "tokens_p.mean_in_band": 0.70703125, "tokens_rate.above_band": 0.95, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05 }, { "epoch": 2.5010651896037492, "grad_norm": 62.47162215038496, "learning_rate": 3.455611976467522e-07, "loss": 0.1701, "step": 11740, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9175824175824175, "success_rate.epoch.env.math": 0.9755725190839695, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8541552008805724, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8662143958479103, "success_rate.epoch.global": 0.9094395908800341, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952290076335878, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9924242424242424, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007575757575757576 }, { "epoch": 2.502130379207499, "grad_norm": 378.8348212985776, "learning_rate": 3.455293574954922e-07, "loss": 0.1936, "step": 11745, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9605263157894737, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9178082191780822, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8540866703236424, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8662320783981219, "success_rate.epoch.global": 0.9094195194556666, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9875, "tokens_p.mean_in_band": 0.6967075892857143, "tokens_rate.above_band": 0.8653846153846154, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1346153846153846 }, { "epoch": 2.5031955688112486, "grad_norm": 80.84940404778263, "learning_rate": 3.454975278903324e-07, "loss": 0.2153, "step": 11750, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.8473282442748091, "success_rate.epoch.env.agentgym:sciworld": 0.9607843137254902, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9167803547066848, "success_rate.epoch.env.math": 0.9756468797564688, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.8544061302681992, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8661945072224931, "success_rate.epoch.global": 0.9093995332060258, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949363425925926, "tokens_p.mean_in_band": 0.779296875, "tokens_rate.above_band": 0.9818181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01818181818181818 }, { "epoch": 2.504260758414998, "grad_norm": 127.90250998608596, "learning_rate": 3.454657088544702e-07, "loss": 0.1443, "step": 11755, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.961038961038961, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9168937329700273, "success_rate.epoch.env.math": 0.9757207890743551, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8544857768052516, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8691674576569096, "success_rate.epoch.global": 0.9095913614228245, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999124343257443, "tokens_p.mean_in_band": 0.796875, "tokens_rate.above_band": 0.9947735191637631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005226480836236934 }, { "epoch": 2.505325948018747, "grad_norm": 106.26361396706993, "learning_rate": 3.4543390041109484e-07, "loss": 0.2536, "step": 11760, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9515418502202643, "success_rate.epoch.env.logic": 0.9168937329700273, "success_rate.epoch.env.math": 0.9758490566037736, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8546448087431694, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692164268395479, "success_rate.epoch.global": 0.9097823790407775, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981715425531915, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9947089947089947, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005291005291005291 }, { "epoch": 2.5063911376224968, "grad_norm": 93.51863281513182, "learning_rate": 3.454021025833882e-07, "loss": 0.2033, "step": 11765, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9519650655021834, "success_rate.epoch.env.logic": 0.9170068027210885, "success_rate.epoch.env.math": 0.9759217456734387, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8548827059465357, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8692934151400942, "success_rate.epoch.global": 0.9099725911870125, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0001858275520317, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9980217606330366, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019782393669634025 }, { "epoch": 2.5074563272262465, "grad_norm": 35.81025600724148, "learning_rate": 3.453703153945243e-07, "loss": 0.0896, "step": 11770, "success_rate.epoch.env.abd": 0.985981308411215, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9612903225806452, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9521739130434783, "success_rate.epoch.env.logic": 0.9172320217096337, "success_rate.epoch.env.math": 0.9759579263711495, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8552774755168662, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693720530308107, "success_rate.epoch.global": 0.9101620029455081, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991834916864608, "tokens_p.mean_in_band": 0.892578125, "tokens_rate.above_band": 0.9952718676122931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004728132387706856 }, { "epoch": 2.5085215168299957, "grad_norm": 46.35443262909214, "learning_rate": 3.453385388676694e-07, "loss": 0.2168, "step": 11775, "success_rate.epoch.env.abd": 0.986046511627907, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9172320217096337, "success_rate.epoch.env.math": 0.9759939984996249, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8550488599348535, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869424127213577, "success_rate.epoch.global": 0.9101406676464413, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988425925925926, "tokens_p.mean_in_band": 0.484375, "tokens_rate.above_band": 0.9830097087378641, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01699029126213592 }, { "epoch": 2.509586706433745, "grad_norm": 924.0894569565932, "learning_rate": 3.4530677302598205e-07, "loss": 0.4212, "step": 11780, "success_rate.epoch.env.abd": 0.986046511627907, "success_rate.epoch.env.agentgym:alfworld": 0.849624060150376, "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9172320217096337, "success_rate.epoch.env.math": 0.9760479041916168, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8545159545700378, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869380581788775, "success_rate.epoch.global": 0.9099099099099099, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924242424242424, "tokens_p.mean_in_band": 0.5083333333333333, "tokens_rate.above_band": 0.868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13157894736842105 }, { "epoch": 2.5106518960374946, "grad_norm": 36.89431111688203, "learning_rate": 3.452750178926129e-07, "loss": 0.127, "step": 11785, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8507462686567164, "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9172320217096337, "success_rate.epoch.env.math": 0.9761549925484352, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8545945945945946, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695240173239133, "success_rate.epoch.global": 0.9100982646874347, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0005194663167105, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.5117170856412443, "grad_norm": 271.9096404052713, "learning_rate": 3.452432734907049e-07, "loss": 0.3341, "step": 11790, "success_rate.epoch.env.abd": 0.9861751152073732, "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, "success_rate.epoch.env.agentgym:sciworld": 0.9617834394904459, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9173441734417345, "success_rate.epoch.env.math": 0.9762258543833581, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8548300053966541, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696683820203206, "success_rate.epoch.global": 0.910285833507198, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982358870967742, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9763779527559056, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023622047244094488 }, { "epoch": 2.5127822752449935, "grad_norm": 96.73718424779733, "learning_rate": 3.452115398433931e-07, "loss": 0.2184, "step": 11795, "success_rate.epoch.env.abd": 0.9862385321100917, "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9525862068965517, "success_rate.epoch.env.logic": 0.9174560216508796, "success_rate.epoch.env.math": 0.9762787249814677, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8551427032848681, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697395375260875, "success_rate.epoch.global": 0.9104726212783677, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983928571428572, "tokens_p.mean_in_band": 0.78515625, "tokens_rate.above_band": 0.9887005649717514, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011299435028248588 }, { "epoch": 2.513847464848743, "grad_norm": 115.53188679644788, "learning_rate": 3.4517981697380487e-07, "loss": 0.2252, "step": 11800, "success_rate.epoch.env.abd": 0.9862385321100917, "success_rate.epoch.env.agentgym:alfworld": 0.8518518518518519, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9527896995708155, "success_rate.epoch.env.logic": 0.9174560216508796, "success_rate.epoch.env.math": 0.9763488543976349, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.855531686358754, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697997743592073, "success_rate.epoch.global": 0.9106586328693123, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9952847633136095, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9941176470588236, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0058823529411764705 }, { "epoch": 2.5149126544524925, "grad_norm": 44.20564611105242, "learning_rate": 3.451481049050594e-07, "loss": 0.2193, "step": 11805, "success_rate.epoch.env.abd": 0.9862385321100917, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9620253164556962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9531914893617022, "success_rate.epoch.env.logic": 0.9175675675675675, "success_rate.epoch.env.math": 0.9763663220088626, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8559185859667916, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699822314088049, "success_rate.epoch.global": 0.9108438731080241, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997800736497545, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.515977844056242, "grad_norm": 167.84103061086964, "learning_rate": 3.4511640366026847e-07, "loss": 0.3937, "step": 11810, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9176788124156545, "success_rate.epoch.env.math": 0.9763663220088626, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8558462359850507, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869616113064085, "success_rate.epoch.global": 0.9106145251396648, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952766021765417, "tokens_p.mean_in_band": 0.6990209651898734, "tokens_rate.above_band": 0.91280353200883, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08719646799116998 }, { "epoch": 2.5170430336599914, "grad_norm": 316.42876209930756, "learning_rate": 3.4508471326253555e-07, "loss": 0.5389, "step": 11815, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9622641509433962, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9176788124156545, "success_rate.epoch.env.math": 0.9764359351988218, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8557743480574774, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696159062697565, "success_rate.epoch.global": 0.9105926078876729, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9166666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895833333333334, "tokens_p.mean_in_band": 0.55078125, "tokens_rate.above_band": 0.9590163934426229, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040983606557377046 }, { "epoch": 2.518108223263741, "grad_norm": 84.00079833773995, "learning_rate": 3.4505303373495643e-07, "loss": 0.2716, "step": 11820, "success_rate.epoch.env.abd": 0.9817351598173516, "success_rate.epoch.env.agentgym:alfworld": 0.8529411764705882, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.9177897574123989, "success_rate.epoch.env.math": 0.9765051395007343, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8555496548061604, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696332976428423, "success_rate.epoch.global": 0.9105707809602308, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972098214285714, "tokens_p.mean_in_band": 0.228125, "tokens_rate.above_band": 0.9438202247191011, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056179775280898875 }, { "epoch": 2.5191734128674903, "grad_norm": 34.11316047221333, "learning_rate": 3.450213651006189e-07, "loss": 0.2427, "step": 11825, "success_rate.epoch.env.abd": 0.9818181818181818, "success_rate.epoch.env.agentgym:alfworld": 0.8540145985401459, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9533898305084746, "success_rate.epoch.env.logic": 0.918010752688172, "success_rate.epoch.env.math": 0.9765395894428153, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8558558558558559, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697894876735636, "success_rate.epoch.global": 0.910754678182192, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986213235294118, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9951219512195122, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004878048780487805 }, { "epoch": 2.52023860247124, "grad_norm": 39.41352343538918, "learning_rate": 3.449897073826029e-07, "loss": 0.1307, "step": 11830, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.8489208633093526, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9181208053691275, "success_rate.epoch.env.math": 0.9765567765567765, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8561607615018508, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8693910647883314, "success_rate.epoch.global": 0.9107326082495383, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995909685863874, "tokens_p.mean_in_band": 0.6865234375, "tokens_rate.above_band": 0.9986928104575163, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00130718954248366 }, { "epoch": 2.5213037920749892, "grad_norm": 64.30824711847141, "learning_rate": 3.4495806060398017e-07, "loss": 0.165, "step": 11835, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9181208053691275, "success_rate.epoch.env.math": 0.9766763848396501, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8563127311146329, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695138570234501, "success_rate.epoch.global": 0.9109154208478395, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984375, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9900990099009901, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009900990099009901 }, { "epoch": 2.522368981678739, "grad_norm": 104.599357907905, "learning_rate": 3.4492642478781487e-07, "loss": 0.3187, "step": 11840, "success_rate.epoch.env.abd": 0.9819004524886877, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9535864978902954, "success_rate.epoch.env.logic": 0.9186666666666666, "success_rate.epoch.env.math": 0.976693372177713, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8560885608856088, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695446459695026, "success_rate.epoch.global": 0.9108931126098508, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976615646258503, "tokens_p.mean_in_band": 0.46875, "tokens_rate.above_band": 0.9671052631578947, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03289473684210526 }, { "epoch": 2.523434171282488, "grad_norm": 17.943847670638224, "learning_rate": 3.4489479995716285e-07, "loss": 0.2725, "step": 11845, "success_rate.epoch.env.abd": 0.9819819819819819, "success_rate.epoch.env.agentgym:alfworld": 0.85, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9537815126050421, "success_rate.epoch.env.logic": 0.9186666666666666, "success_rate.epoch.env.math": 0.9767610748002905, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8563913729615992, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869603469506467, "success_rate.epoch.global": 0.9110748521313482, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990770042194093, "tokens_p.mean_in_band": 0.70703125, "tokens_rate.above_band": 0.9978947368421053, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002105263157894737 }, { "epoch": 2.524499360886238, "grad_norm": 54.977073550462976, "learning_rate": 3.4486318613507205e-07, "loss": 0.1834, "step": 11850, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, "success_rate.epoch.env.agentgym:sciworld": 0.9625, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.918774966711052, "success_rate.epoch.env.math": 0.9768115942028985, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8566176470588235, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8697601153620101, "success_rate.epoch.global": 0.9112558518216975, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971033868092691, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9929203539823008, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007079646017699115 }, { "epoch": 2.525564550489987, "grad_norm": 105.62523633771117, "learning_rate": 3.448315833445824e-07, "loss": 0.3318, "step": 11855, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, "success_rate.epoch.env.agentgym:sciworld": 0.9627329192546584, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.918774966711052, "success_rate.epoch.env.math": 0.9747292418772563, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8563941299790356, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695716653483035, "success_rate.epoch.global": 0.9106236034938046, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.4, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7166666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9933176100628931, "tokens_p.mean_in_band": 0.4583625793457031, "tokens_rate.above_band": 0.9520958083832335, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04790419161676647 }, { "epoch": 2.5266297400937368, "grad_norm": 46.006171320987605, "learning_rate": 3.447999916087259e-07, "loss": 0.2269, "step": 11860, "success_rate.epoch.env.abd": 0.9820627802690582, "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9188829787234043, "success_rate.epoch.env.math": 0.9748020158387329, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.856694560669456, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696363253821722, "success_rate.epoch.global": 0.9108047841070342, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943647540983607, "tokens_p.mean_in_band": 0.7125, "tokens_rate.above_band": 0.973404255319149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026595744680851064 }, { "epoch": 2.527694929697486, "grad_norm": 282.99357569758575, "learning_rate": 3.447684109505262e-07, "loss": 0.2537, "step": 11865, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9190981432360743, "success_rate.epoch.env.math": 0.9748563218390804, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8569937369519833, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696953002612033, "success_rate.epoch.global": 0.910985231640704, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959608843537415, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.98, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02 }, { "epoch": 2.5287601193012357, "grad_norm": 0.0, "learning_rate": 3.447368413929991e-07, "loss": 0.2368, "step": 11870, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, "success_rate.epoch.env.agentgym:sciworld": 0.9629629629629629, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.9194187582562747, "success_rate.epoch.env.math": 0.9749283667621776, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8572173006774362, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869751320594726, "success_rate.epoch.global": 0.9111649505350293, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9871323529411765, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9883720930232558, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011627906976744186 }, { "epoch": 2.529825308904985, "grad_norm": 71.12536466098928, "learning_rate": 3.4470528295915243e-07, "loss": 0.2253, "step": 11875, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.851063829787234, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9539748953974896, "success_rate.epoch.env.logic": 0.919631093544137, "success_rate.epoch.env.math": 0.974964234620887, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8575883575883576, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8698282734259635, "success_rate.epoch.global": 0.9113439451944388, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998046875, "tokens_p.mean_in_band": 0.7, "tokens_rate.above_band": 0.9746192893401016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025380710659898477 }, { "epoch": 2.5308904985087346, "grad_norm": 200.85363222817136, "learning_rate": 3.4467373567198557e-07, "loss": 0.4284, "step": 11880, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.852112676056338, "success_rate.epoch.env.agentgym:sciworld": 0.9631901840490797, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9541666666666667, "success_rate.epoch.env.logic": 0.919631093544137, "success_rate.epoch.env.math": 0.9750712250712251, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8572170301142263, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699170262000077, "success_rate.epoch.global": 0.9113211341242711, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996421755725191, "tokens_p.mean_in_band": 0.3802083333333333, "tokens_rate.above_band": 0.9924242424242424, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007575757575757576 }, { "epoch": 2.5319556881124843, "grad_norm": 105.95514999887611, "learning_rate": 3.4464219955449003e-07, "loss": 0.2484, "step": 11885, "success_rate.epoch.env.abd": 0.9821428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9541666666666667, "success_rate.epoch.env.logic": 0.9185282522996058, "success_rate.epoch.env.math": 0.9751243781094527, "success_rate.epoch.env.sat": 0.13333333333333333, "success_rate.epoch.env.science": 0.8573651452282157, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699494856632316, "success_rate.epoch.global": 0.9112984146096729, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997965976331361, "tokens_p.mean_in_band": 0.40625, "tokens_rate.above_band": 0.9835111542192047, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016488845780795344 }, { "epoch": 2.5330208777162335, "grad_norm": 82.42247365060848, "learning_rate": 3.4461067462964906e-07, "loss": 0.3629, "step": 11890, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, "success_rate.epoch.env.agentgym:sciworld": 0.9634146341463414, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9185282522996058, "success_rate.epoch.env.math": 0.9751420454545454, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8571428571428571, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8695786203250474, "success_rate.epoch.global": 0.9110755057079912, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976973684210526, "tokens_p.mean_in_band": 0.6907894736842105, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0625 }, { "epoch": 2.5340860673199828, "grad_norm": 59.630699271046154, "learning_rate": 3.445791609204379e-07, "loss": 0.3481, "step": 11895, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9186351706036745, "success_rate.epoch.env.math": 0.9751948972360028, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8575116159008777, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.869646825628099, "success_rate.epoch.global": 0.9112532480511693, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953125, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.9859154929577465, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014084507042253521 }, { "epoch": 2.5351512569237324, "grad_norm": 57.46211290920506, "learning_rate": 3.445476584498234e-07, "loss": 0.2065, "step": 11900, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8531468531468531, "success_rate.epoch.env.agentgym:sciworld": 0.9636363636363636, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.96, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9186351706036745, "success_rate.epoch.env.math": 0.9752650176678446, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8574369531652084, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8696464126913874, "success_rate.epoch.global": 0.9112307999202075, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9166666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936342592592593, "tokens_p.mean_in_band": 0.404296875, "tokens_rate.above_band": 0.9642857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03571428571428571 }, { "epoch": 2.536216446527482, "grad_norm": 113.43745596251871, "learning_rate": 3.4451616724076433e-07, "loss": 0.2582, "step": 11905, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8541666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9615384615384616, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9174311926605505, "success_rate.epoch.env.math": 0.9752824858757062, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8578028747433265, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8698242985960707, "success_rate.epoch.global": 0.9112084411706152, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987148268398268, "tokens_p.mean_in_band": 0.5125558035714286, "tokens_rate.above_band": 0.9705882352941176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029411764705882353 }, { "epoch": 2.5372816361312314, "grad_norm": 133.23284597020634, "learning_rate": 3.444846873162113e-07, "loss": 0.1637, "step": 11910, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8541666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9543568464730291, "success_rate.epoch.env.logic": 0.9164490861618799, "success_rate.epoch.env.math": 0.9752824858757062, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8581669226830517, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8698976115838485, "success_rate.epoch.global": 0.9111861712696205, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972064393939394, "tokens_p.mean_in_band": 0.7469618055555556, "tokens_rate.above_band": 0.9865470852017937, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013452914798206279 }, { "epoch": 2.5383468257349806, "grad_norm": 94.25149636356029, "learning_rate": 3.4445321869910676e-07, "loss": 0.1338, "step": 11915, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8541666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9753173483779972, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.858456821665815, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699640615895091, "success_rate.epoch.global": 0.9113622843545509, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0006613756613756, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.5394120153387303, "grad_norm": 61.80527106212896, "learning_rate": 3.4442176141238465e-07, "loss": 0.2954, "step": 11920, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8551724137931035, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9154746423927178, "success_rate.epoch.env.math": 0.9753867791842475, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8582355940846507, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8699333275965611, "success_rate.epoch.global": 0.9111418959034238, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980852601156069, "tokens_p.mean_in_band": 0.4550189393939394, "tokens_rate.above_band": 0.9632516703786191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036748329621380846 }, { "epoch": 2.54047720494248, "grad_norm": 233.32823743982402, "learning_rate": 3.443903154789709e-07, "loss": 0.1735, "step": 11925, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8551724137931035, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9547325102880658, "success_rate.epoch.env.logic": 0.9155844155844156, "success_rate.epoch.env.math": 0.9754040758959944, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.858739837398374, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870007724774723, "success_rate.epoch.global": 0.9113174007505431, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985346585117227, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9989816700610998, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0010183299389002036 }, { "epoch": 2.541542394546229, "grad_norm": 21.235977992283846, "learning_rate": 3.44358880921783e-07, "loss": 0.5277, "step": 11930, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8561643835616438, "success_rate.epoch.env.agentgym:sciworld": 0.963855421686747, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9549180327868853, "success_rate.epoch.env.logic": 0.9156939040207522, "success_rate.epoch.env.math": 0.9747368421052631, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8590978205778003, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700966093285767, "success_rate.epoch.global": 0.9112950916617386, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979564032697548, "tokens_p.mean_in_band": 0.633056640625, "tokens_rate.above_band": 0.9839142091152815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0160857908847185 }, { "epoch": 2.5426075841499785, "grad_norm": 68.92423608218708, "learning_rate": 3.4432745776373033e-07, "loss": 0.2849, "step": 11935, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8561643835616438, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9549180327868853, "success_rate.epoch.env.logic": 0.9158031088082902, "success_rate.epoch.env.math": 0.9741258741258741, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8588056680161943, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700441110419604, "success_rate.epoch.global": 0.9110761361400748, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957627118644068, "tokens_p.mean_in_band": 0.5522017045454546, "tokens_rate.above_band": 0.9797047970479705, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02029520295202952 }, { "epoch": 2.543672773753728, "grad_norm": 61.590577156416416, "learning_rate": 3.442960460277138e-07, "loss": 0.2655, "step": 11940, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8561643835616438, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.9148387096774193, "success_rate.epoch.env.math": 0.9741620111731844, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8590909090909091, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700023825892873, "success_rate.epoch.global": 0.9110543883762027, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988162878787878, "tokens_p.mean_in_band": 0.59661865234375, "tokens_rate.above_band": 0.99, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01 }, { "epoch": 2.544737963357478, "grad_norm": 273.7021754812007, "learning_rate": 3.442646457366261e-07, "loss": 0.3168, "step": 11945, "success_rate.epoch.env.abd": 0.9823788546255506, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9551020408163265, "success_rate.epoch.env.logic": 0.9149484536082474, "success_rate.epoch.env.math": 0.9742698191933241, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.858728557013118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870078171085141, "success_rate.epoch.global": 0.9110327258475407, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982839595375722, "tokens_p.mean_in_band": 0.2734375, "tokens_rate.above_band": 0.9857549857549858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014245014245014245 }, { "epoch": 2.545803152961227, "grad_norm": 188.12848402098945, "learning_rate": 3.4423325691335146e-07, "loss": 0.4486, "step": 11950, "success_rate.epoch.env.abd": 0.9824561403508771, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9552845528455285, "success_rate.epoch.env.logic": 0.915057915057915, "success_rate.epoch.env.math": 0.974323386537127, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8585095669687814, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870096701676383, "success_rate.epoch.global": 0.9110111480539801, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956430288461539, "tokens_p.mean_in_band": 0.6462656656901041, "tokens_rate.above_band": 0.9454545454545454, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05454545454545454 }, { "epoch": 2.5468683425649763, "grad_norm": 986.7548172335769, "learning_rate": 3.442018795807659e-07, "loss": 0.4894, "step": 11955, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.8571428571428571, "success_rate.epoch.env.agentgym:sciworld": 0.9640718562874252, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.9151670951156813, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8587939698492463, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8698088954837633, "success_rate.epoch.global": 0.9109896544993168, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904011285574092, "tokens_p.mean_in_band": 0.501787781084656, "tokens_rate.above_band": 0.7294201861130994, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2705798138869005 }, { "epoch": 2.547933532168726, "grad_norm": 43.304350197091985, "learning_rate": 3.4417051376173694e-07, "loss": 0.1971, "step": 11960, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.86, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9151670951156813, "success_rate.epoch.env.math": 0.9743944636678201, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8585047666833918, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8700826784554251, "success_rate.epoch.global": 0.9109682446912137, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971291415662651, "tokens_p.mean_in_band": 0.6881510416666666, "tokens_rate.above_band": 0.991044776119403, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008955223880597015 }, { "epoch": 2.5489987217724757, "grad_norm": 59.336164353104444, "learning_rate": 3.4413915947912385e-07, "loss": 0.3882, "step": 11965, "success_rate.epoch.env.abd": 0.982532751091703, "success_rate.epoch.env.agentgym:alfworld": 0.8609271523178808, "success_rate.epoch.env.agentgym:sciworld": 0.9642857142857143, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9153846153846154, "success_rate.epoch.env.math": 0.9737387698686939, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8588588588588588, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701593212702573, "success_rate.epoch.global": 0.9109469181411628, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964539007092199, "tokens_p.mean_in_band": 0.0031890869140625, "tokens_rate.above_band": 0.9976415094339622, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0023584905660377358 }, { "epoch": 2.550063911376225, "grad_norm": 96.63634927298644, "learning_rate": 3.4410781675577737e-07, "loss": 0.3646, "step": 11970, "success_rate.epoch.env.abd": 0.9826086956521739, "success_rate.epoch.env.agentgym:alfworld": 0.8618421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9143222506393862, "success_rate.epoch.env.math": 0.9737750172532781, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8585707146426786, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8701491360941962, "success_rate.epoch.global": 0.9107316126528237, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.861111111111111, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984358359957402, "tokens_p.mean_in_band": 0.4806640625, "tokens_rate.above_band": 0.9791449426485923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020855057351407715 }, { "epoch": 2.5511291009799746, "grad_norm": 32.12220785385132, "learning_rate": 3.4407648561453977e-07, "loss": 0.2023, "step": 11975, "success_rate.epoch.env.abd": 0.9826086956521739, "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, "success_rate.epoch.env.agentgym:sciworld": 0.9644970414201184, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9144316730523627, "success_rate.epoch.env.math": 0.9738111647139903, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8589935226706528, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.870282897064898, "success_rate.epoch.global": 0.9109045128801084, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984186746987952, "tokens_p.mean_in_band": 0.759765625, "tokens_rate.above_band": 0.9952038369304557, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004796163069544364 }, { "epoch": 2.552194290583724, "grad_norm": 53.54048083969894, "learning_rate": 3.440451660782451e-07, "loss": 0.2448, "step": 11980, "success_rate.epoch.env.abd": 0.9826839826839827, "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9145408163265306, "success_rate.epoch.env.math": 0.9738831615120275, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8592039800995025, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703443266526878, "success_rate.epoch.global": 0.911076744635608, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998077876984127, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.998019801980198, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019801980198019802 }, { "epoch": 2.5532594801874735, "grad_norm": 70.7509008739078, "learning_rate": 3.4401385816971866e-07, "loss": 0.2233, "step": 11985, "success_rate.epoch.env.abd": 0.9826839826839827, "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9146496815286624, "success_rate.epoch.env.math": 0.9739726027397261, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.858987090367428, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703426372615746, "success_rate.epoch.global": 0.9110553733359058, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957540760869565, "tokens_p.mean_in_band": 0.76318359375, "tokens_rate.above_band": 0.9583333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041666666666666664 }, { "epoch": 2.5543246697912227, "grad_norm": 182.2298132980826, "learning_rate": 3.4398256191177756e-07, "loss": 0.2803, "step": 11990, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8627450980392157, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9150823827629911, "success_rate.epoch.env.math": 0.974025974025974, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8586309523809523, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8703612347651905, "success_rate.epoch.global": 0.9110340843443097, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969512195121951, "tokens_p.mean_in_band": 0.498046875, "tokens_rate.above_band": 0.9534883720930233, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046511627906976744 }, { "epoch": 2.5553898593949724, "grad_norm": 495.6901149480837, "learning_rate": 3.439512773272302e-07, "loss": 0.2772, "step": 11995, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.9722222222222222, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.9518072289156626, "success_rate.epoch.env.logic": 0.9152970922882427, "success_rate.epoch.env.math": 0.9740967961826857, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8588410104011887, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8704873125196765, "success_rate.epoch.global": 0.9112050739957717, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984984984984985, "tokens_p.mean_in_band": 0.7640625, "tokens_rate.above_band": 0.985207100591716, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014792899408284023 }, { "epoch": 2.5564550489987217, "grad_norm": 46.33575643759948, "learning_rate": 3.439200044388766e-07, "loss": 0.261, "step": 12000, "success_rate.epoch.env.abd": 0.9827586206896551, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.952, "success_rate.epoch.env.logic": 0.9155107187894073, "success_rate.epoch.env.math": 0.9741144414168937, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.858201581027668, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705359819921256, "success_rate.epoch.global": 0.9109917513907538, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979003359462486, "tokens_p.mean_below_band": 2.648448571562767e-09, "tokens_p.mean_in_band": 0.44557291666666665, "tokens_rate.above_band": 0.9823982398239824, "tokens_rate.below_band": 0.0011001100110011, "tokens_rate.in_band": 0.0165016501650165 }, { "epoch": 2.5575202386024714, "grad_norm": 75.8550203441575, "learning_rate": 3.438887432695082e-07, "loss": 0.2481, "step": 12005, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9629629629629629, "success_rate.epoch.env.ded": 0.952191235059761, "success_rate.epoch.env.logic": 0.9156171284634761, "success_rate.epoch.env.math": 0.9741847826086957, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8584114454859398, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8705952408757619, "success_rate.epoch.global": 0.9111621673367797, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981602186711522, "tokens_p.mean_in_band": 0.814453125, "tokens_rate.above_band": 0.998320738874895, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016792611251049538 }, { "epoch": 2.5585854282062206, "grad_norm": 67.08803362906818, "learning_rate": 3.4385749384190794e-07, "loss": 0.2412, "step": 12010, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.8636363636363636, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.952191235059761, "success_rate.epoch.env.logic": 0.9159347553324969, "success_rate.epoch.env.math": 0.9742023082145281, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8586903003446578, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8708832668530271, "success_rate.epoch.global": 0.9113319319701891, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973872950819672, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.5596506178099703, "grad_norm": 237.71341347317403, "learning_rate": 3.438262561788502e-07, "loss": 0.4032, "step": 12015, "success_rate.epoch.env.abd": 0.9828326180257511, "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9525691699604744, "success_rate.epoch.env.logic": 0.9159347553324969, "success_rate.epoch.env.math": 0.9742895805142084, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8588293162813576, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710181749014805, "success_rate.epoch.global": 0.911501049017738, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983127376425855, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.5607158074137195, "grad_norm": 62.7319749104168, "learning_rate": 3.437950303031007e-07, "loss": 0.1932, "step": 12020, "success_rate.epoch.env.abd": 0.9829059829059829, "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.9161451814768461, "success_rate.epoch.env.math": 0.9743243243243244, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8591065292096219, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710893097482348, "success_rate.epoch.global": 0.9116695221778032, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971498371335505, "tokens_p.mean_in_band": 0.8567708333333334, "tokens_rate.above_band": 0.9967532467532467, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003246753246753247 }, { "epoch": 2.561780997017469, "grad_norm": 129.25711300667405, "learning_rate": 3.4376381623741664e-07, "loss": 0.2234, "step": 12025, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.91625, "success_rate.epoch.env.math": 0.9744107744107744, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8592447278077489, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8711324308172689, "success_rate.epoch.global": 0.9118373551206537, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990354938271605, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9938650306748467, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006134969325153374 }, { "epoch": 2.5628461866212184, "grad_norm": 130.4999063049413, "learning_rate": 3.437326140045467e-07, "loss": 0.2314, "step": 12030, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.864516129032258, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9529411764705882, "success_rate.epoch.env.logic": 0.916354556803995, "success_rate.epoch.env.math": 0.9744107744107744, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8593063019052272, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8711643764409281, "success_rate.epoch.global": 0.9118149061255453, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966216216216216, "tokens_p.mean_in_band": 0.5714285714285714, "tokens_rate.above_band": 0.9694323144104804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03056768558951965 }, { "epoch": 2.563911376224968, "grad_norm": 285.1381440444271, "learning_rate": 3.437014236272307e-07, "loss": 0.192, "step": 12035, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9647058823529412, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9153175591531756, "success_rate.epoch.env.math": 0.9744623655913979, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8595121951219512, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8711891761344098, "success_rate.epoch.global": 0.9117925421162218, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996292372881356, "tokens_p.mean_in_band": 0.684326171875, "tokens_rate.above_band": 0.9866220735785953, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013377926421404682 }, { "epoch": 2.5649765658287174, "grad_norm": 443.55851214993623, "learning_rate": 3.436702451282e-07, "loss": 0.3052, "step": 12040, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.953307392996109, "success_rate.epoch.env.logic": 0.9156327543424317, "success_rate.epoch.env.math": 0.9745137491616365, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8596491228070176, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712702941152718, "success_rate.epoch.global": 0.9119591913848479, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974626068376068, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9989327641408752, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0010672358591248667 }, { "epoch": 2.566041755432467, "grad_norm": 41.880318313677236, "learning_rate": 3.436390785301774e-07, "loss": 0.2218, "step": 12045, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8653846153846154, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9157372986369269, "success_rate.epoch.env.math": 0.9746158984635939, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8597857838364168, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713179608173415, "success_rate.epoch.global": 0.9121252121440694, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998624213836478, "tokens_p.mean_in_band": 0.6768973214285714, "tokens_rate.above_band": 0.9784615384615385, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021538461538461538 }, { "epoch": 2.5671069450362163, "grad_norm": 136.0080793586695, "learning_rate": 3.436079238558768e-07, "loss": 0.2955, "step": 12050, "success_rate.epoch.env.abd": 0.9830508474576272, "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9158415841584159, "success_rate.epoch.env.math": 0.9746497665110073, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8601941747572815, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714455942102245, "success_rate.epoch.global": 0.9122906079427818, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979383680555556, "tokens_p.mean_in_band": 0.7408854166666666, "tokens_rate.above_band": 0.9896907216494846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010309278350515464 }, { "epoch": 2.568172134639966, "grad_norm": 82.02322753070825, "learning_rate": 3.4357678112800344e-07, "loss": 0.3307, "step": 12055, "success_rate.epoch.env.abd": 0.9831932773109243, "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9160493827160494, "success_rate.epoch.env.math": 0.9746835443037974, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.8604651162790697, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715051349125434, "success_rate.epoch.global": 0.9124553823032124, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9940664556962026, "tokens_p.mean_in_band": 0.783203125, "tokens_rate.above_band": 0.9753086419753086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024691358024691357 }, { "epoch": 2.5692373242437156, "grad_norm": 54.3420676600815, "learning_rate": 3.43545650369254e-07, "loss": 0.3046, "step": 12060, "success_rate.epoch.env.abd": 0.9832635983263598, "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, "success_rate.epoch.env.agentgym:sciworld": 0.9649122807017544, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9161528976572133, "success_rate.epoch.env.math": 0.9740518962075848, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.860318994683422, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714665574434225, "success_rate.epoch.global": 0.9122445152822052, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.86, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988111413043478, "tokens_p.mean_in_band": 0.3940972222222222, "tokens_rate.above_band": 0.9761273209549072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023872679045092837 }, { "epoch": 2.570302513847465, "grad_norm": 216.6815496720348, "learning_rate": 3.435145316023163e-07, "loss": 0.2443, "step": 12065, "success_rate.epoch.env.abd": 0.9832635983263598, "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9163591635916359, "success_rate.epoch.env.math": 0.9740863787375416, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.860655737704918, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715376019755943, "success_rate.epoch.global": 0.9124087591240876, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971181556195965, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9914285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008571428571428572 }, { "epoch": 2.571367703451214, "grad_norm": 126.43073394207107, "learning_rate": 3.4348342484986954e-07, "loss": 0.2138, "step": 12070, "success_rate.epoch.env.abd": 0.9833333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.8662420382165605, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9164619164619164, "success_rate.epoch.env.math": 0.9741379310344828, "success_rate.epoch.env.sat": 0.12903225806451613, "success_rate.epoch.env.science": 0.860990860990861, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715884350174249, "success_rate.epoch.global": 0.9125723893144031, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994552752293578, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.990909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00909090909090909 }, { "epoch": 2.572432893054964, "grad_norm": 154.7327436373478, "learning_rate": 3.43452330134584e-07, "loss": 0.1627, "step": 12075, "success_rate.epoch.env.abd": 0.9834710743801653, "success_rate.epoch.env.agentgym:alfworld": 0.8670886075949367, "success_rate.epoch.env.agentgym:sciworld": 0.9651162790697675, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9165644171779141, "success_rate.epoch.env.math": 0.974155069582505, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8612578012481997, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713464924612476, "success_rate.epoch.global": 0.9125489464851763, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970346715328468, "tokens_p.mean_in_band": 0.660888671875, "tokens_rate.above_band": 0.9625292740046838, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03747072599531616 }, { "epoch": 2.5734980826587135, "grad_norm": 175.87832004518725, "learning_rate": 3.434212474791214e-07, "loss": 0.1704, "step": 12080, "success_rate.epoch.env.abd": 0.9834710743801653, "success_rate.epoch.env.agentgym:alfworld": 0.8670886075949367, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9167686658506732, "success_rate.epoch.env.math": 0.9741892786234282, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8615900383141762, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714167047144327, "success_rate.epoch.global": 0.9127117066815559, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9952330508474576, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.5745632722624627, "grad_norm": 25.00494534986744, "learning_rate": 3.4339017690613447e-07, "loss": 0.1206, "step": 12085, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9536679536679536, "success_rate.epoch.env.logic": 0.9167686658506732, "success_rate.epoch.env.math": 0.9742063492063492, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8615090735434575, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714992055795269, "success_rate.epoch.global": 0.9126880921419283, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971774193548387, "tokens_p.mean_in_band": 0.7428385416666666, "tokens_rate.above_band": 0.9810126582278481, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0189873417721519 }, { "epoch": 2.575628461866212, "grad_norm": 86.39718190365788, "learning_rate": 3.4335911843826724e-07, "loss": 0.2611, "step": 12090, "success_rate.epoch.env.abd": 0.9836065573770492, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9156479217603912, "success_rate.epoch.env.math": 0.9742574257425742, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8617731172545281, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714582429426913, "success_rate.epoch.global": 0.9126645651770814, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0002976190476192, "tokens_p.mean_in_band": 0.611328125, "tokens_rate.above_band": 0.9897172236503856, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010282776349614395 }, { "epoch": 2.5766936514699617, "grad_norm": 360.3867822448883, "learning_rate": 3.433280720981549e-07, "loss": 0.4524, "step": 12095, "success_rate.epoch.env.abd": 0.983739837398374, "success_rate.epoch.env.agentgym:alfworld": 0.8625, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9158536585365854, "success_rate.epoch.env.math": 0.974291364535267, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8614945264159923, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8709736826199449, "success_rate.epoch.global": 0.9124560429391079, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986822289156626, "tokens_p.mean_in_band": 0.5602678571428571, "tokens_rate.above_band": 0.9595375722543352, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04046242774566474 }, { "epoch": 2.5777588410737113, "grad_norm": 138.625620808841, "learning_rate": 3.4329703790842384e-07, "loss": 0.178, "step": 12100, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9655172413793104, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9159561510353228, "success_rate.epoch.env.math": 0.9743421052631579, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8616920152091255, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8710951271317996, "success_rate.epoch.global": 0.9126177720302975, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981930272108843, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9966101694915255, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003389830508474576 }, { "epoch": 2.5788240306774606, "grad_norm": 20.383299070372694, "learning_rate": 3.432660158916915e-07, "loss": 0.2621, "step": 12105, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9159561510353228, "success_rate.epoch.env.math": 0.9743926460932371, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.862085308056872, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712399688559072, "success_rate.epoch.global": 0.9127789046653144, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978448275862069, "tokens_p.mean_in_band": 0.6067708333333334, "tokens_rate.above_band": 0.9854368932038835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014563106796116505 }, { "epoch": 2.57988922028121, "grad_norm": 164.3190243933171, "learning_rate": 3.4323500607056656e-07, "loss": 0.1043, "step": 12110, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9653179190751445, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9162621359223301, "success_rate.epoch.env.math": 0.9744597249508841, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8622811168954093, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8712916845452882, "success_rate.epoch.global": 0.9129394441376771, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9884020618556701, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9797979797979798, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020202020202020204 }, { "epoch": 2.5809544098849595, "grad_norm": 252.57942758067566, "learning_rate": 3.4320400846764867e-07, "loss": 0.2068, "step": 12115, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9659090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.972972972972973, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9543726235741445, "success_rate.epoch.env.logic": 0.9162621359223301, "success_rate.epoch.env.math": 0.9744931327665141, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8624763705103969, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713979998482848, "success_rate.epoch.global": 0.9130993937167003, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993822324011572, "tokens_p.mean_in_band": 0.6627604166666666, "tokens_rate.above_band": 0.9985556090515166, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014443909484833895 }, { "epoch": 2.582019599488709, "grad_norm": 41.87077369208928, "learning_rate": 3.4317302310552874e-07, "loss": 0.3314, "step": 12120, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9543726235741445, "success_rate.epoch.env.logic": 0.9162621359223301, "success_rate.epoch.env.math": 0.9745430809399478, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8618576143328619, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714284574442307, "success_rate.epoch.global": 0.9128919860627178, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982108778625954, "tokens_p.mean_in_band": 0.380859375, "tokens_rate.above_band": 0.9562043795620438, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043795620437956206 }, { "epoch": 2.5830847890924584, "grad_norm": 20.958413798253876, "learning_rate": 3.4314205000678866e-07, "loss": 0.1431, "step": 12125, "success_rate.epoch.env.abd": 0.9839357429718876, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9162621359223301, "success_rate.epoch.env.math": 0.974559686888454, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.862312030075188, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714928781616937, "success_rate.epoch.global": 0.9130514369394106, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961301597869507, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.9960212201591512, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003978779840848806 }, { "epoch": 2.5841499786962077, "grad_norm": 56.63666614541662, "learning_rate": 3.431110891940014e-07, "loss": 0.1411, "step": 12130, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9661016949152542, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9165659008464329, "success_rate.epoch.env.math": 0.9745762711864406, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8625703564727955, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715669199102783, "success_rate.epoch.global": 0.9132103051342957, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986631016042781, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9946808510638298, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005319148936170213 }, { "epoch": 2.5852151682999573, "grad_norm": 89.9056166821671, "learning_rate": 3.4308014068973094e-07, "loss": 0.3164, "step": 12135, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9662921348314607, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9746258945998699, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8624239588207768, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715845955009921, "success_rate.epoch.global": 0.9131862119277767, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996830628803245, "tokens_p.mean_in_band": 0.5598958333333334, "tokens_rate.above_band": 0.9879759519038076, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012024048096192385 }, { "epoch": 2.586280357903707, "grad_norm": 87.15984604395845, "learning_rate": 3.4304920451653235e-07, "loss": 0.2294, "step": 12140, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9664804469273743, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.916767189384801, "success_rate.epoch.env.math": 0.9746753246753247, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8627450980392157, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716445413289867, "success_rate.epoch.global": 0.9133442563262334, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972133757961783, "tokens_p.mean_in_band": 0.6809895833333334, "tokens_rate.above_band": 0.98125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01875 }, { "epoch": 2.5873455475074563, "grad_norm": 24.81964765900838, "learning_rate": 3.430182806969517e-07, "loss": 0.3304, "step": 12145, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9664804469273743, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9157641395908543, "success_rate.epoch.env.math": 0.9747409326424871, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.862937062937063, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715767706990835, "success_rate.epoch.global": 0.9133200072687625, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980418797953964, "tokens_p.mean_in_band": 0.5228794642857143, "tokens_rate.above_band": 0.9654320987654321, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0345679012345679 }, { "epoch": 2.588410737111206, "grad_norm": 629.4801858140722, "learning_rate": 3.429873692535261e-07, "loss": 0.2735, "step": 12150, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9547169811320755, "success_rate.epoch.env.logic": 0.9158653846153846, "success_rate.epoch.env.math": 0.9747736093143596, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8633193863319386, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716406311382262, "success_rate.epoch.global": 0.9134772356248866, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_in_band": 0.3828125, "tokens_rate.above_band": 0.9940828402366864, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005917159763313609 }, { "epoch": 2.589475926714955, "grad_norm": 147.1787513620685, "learning_rate": 3.4295647020878346e-07, "loss": 0.257, "step": 12155, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9159663865546218, "success_rate.epoch.env.math": 0.9748873148744366, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8633828996282528, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716814000208323, "success_rate.epoch.global": 0.9136338946224878, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956018518518519, "tokens_p.mean_in_band": 0.7633928571428571, "tokens_rate.above_band": 0.9830097087378641, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01699029126213592 }, { "epoch": 2.590541116318705, "grad_norm": 55.17296432064818, "learning_rate": 3.4292558358524284e-07, "loss": 0.2617, "step": 12160, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8633540372670807, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9161676646706587, "success_rate.epoch.env.math": 0.9749357326478149, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8636995827538247, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717328890221946, "success_rate.epoch.global": 0.9137899873486355, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933712121212122, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.9801980198019802, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019801980198019802 }, { "epoch": 2.591606305922454, "grad_norm": 60.10127876467344, "learning_rate": 3.4289470940541427e-07, "loss": 0.1999, "step": 12165, "success_rate.epoch.env.abd": 0.984, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.916267942583732, "success_rate.epoch.env.math": 0.9749679075738126, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8640776699029126, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718559829844925, "success_rate.epoch.global": 0.913945516868122, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9950132978723404, "tokens_p.mean_in_band": 0.69765625, "tokens_rate.above_band": 0.9740932642487047, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025906735751295335 }, { "epoch": 2.592671495526204, "grad_norm": 98.1968561237007, "learning_rate": 3.428638476917985e-07, "loss": 0.1965, "step": 12170, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9668508287292817, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.9666666666666667, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9164677804295943, "success_rate.epoch.env.math": 0.9750320102432779, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8642032332563511, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719139294346107, "success_rate.epoch.global": 0.9141004862236629, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966755319148937, "tokens_p.mean_in_band": 0.658203125, "tokens_rate.above_band": 0.9832635983263598, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016736401673640166 }, { "epoch": 2.593736685129953, "grad_norm": 66.34300862681035, "learning_rate": 3.428329984668874e-07, "loss": 0.224, "step": 12175, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9548872180451128, "success_rate.epoch.env.logic": 0.9165673420738975, "success_rate.epoch.env.math": 0.975095785440613, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8643911439114391, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.872060170763485, "success_rate.epoch.global": 0.9142548984360956, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991861979166666, "tokens_p.mean_in_band": 0.77734375, "tokens_rate.above_band": 0.9896907216494846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010309278350515464 }, { "epoch": 2.5948018747337027, "grad_norm": 20.708303786492305, "learning_rate": 3.428021617531637e-07, "loss": 0.1319, "step": 12180, "success_rate.epoch.env.abd": 0.9840637450199203, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.967032967032967, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9167657550535078, "success_rate.epoch.env.math": 0.9751275510204082, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8647031753336402, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8721248227372826, "success_rate.epoch.global": 0.9144087565045756, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9942708333333333, "tokens_p.mean_in_band": 0.80625, "tokens_rate.above_band": 0.9836065573770492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01639344262295082 }, { "epoch": 2.595867064337452, "grad_norm": 462.0097831592874, "learning_rate": 3.4277133757310093e-07, "loss": 0.2027, "step": 12185, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, "success_rate.epoch.env.agentgym:textcraft": 0.9736842105263158, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9167657550535078, "success_rate.epoch.env.math": 0.9745547073791349, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8640330730362885, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720339537272437, "success_rate.epoch.global": 0.9140247178936056, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949087078651685, "tokens_p.mean_in_band": 0.5501302083333334, "tokens_rate.above_band": 0.9368421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06315789473684211 }, { "epoch": 2.5969322539412016, "grad_norm": 163.897267507085, "learning_rate": 3.427405259491634e-07, "loss": 0.2552, "step": 12190, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9156769596199525, "success_rate.epoch.env.math": 0.974587039390089, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.864406779661017, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720332270940422, "success_rate.epoch.global": 0.9139996424101555, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952980324074074, "tokens_p.mean_in_band": 0.5223817567567568, "tokens_rate.above_band": 0.9589345172031076, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041065482796892344 }, { "epoch": 2.597997443544951, "grad_norm": 52.529101453778985, "learning_rate": 3.427097269038067e-07, "loss": 0.1746, "step": 12195, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9672131147540983, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9159763313609467, "success_rate.epoch.env.math": 0.9746353836398225, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8646547782350251, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8720873829635636, "success_rate.epoch.global": 0.91415313225058, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9916237113402062, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9797979797979798, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020202020202020204 }, { "epoch": 2.5990626331487006, "grad_norm": 151.37354831033662, "learning_rate": 3.426789404594767e-07, "loss": 0.2205, "step": 12200, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9159763313609467, "success_rate.epoch.env.math": 0.9746995572422518, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8649018712916476, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8721375825495595, "success_rate.epoch.global": 0.9143060751826118, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997043918918919, "tokens_p.mean_in_band": 0.7552083333333334, "tokens_rate.above_band": 0.9801324503311258, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019867549668874173 }, { "epoch": 2.60012782275245, "grad_norm": 89.29630922816644, "learning_rate": 3.426481666386104e-07, "loss": 0.2014, "step": 12205, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9552238805970149, "success_rate.epoch.env.logic": 0.9161747343565525, "success_rate.epoch.env.math": 0.9747315224257739, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8651480637813211, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8722121757050393, "success_rate.epoch.global": 0.9144584741241331, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0005926042983566, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9987373737373737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0012626262626262627 }, { "epoch": 2.6011930123561995, "grad_norm": 196.83021355492997, "learning_rate": 3.426174054636356e-07, "loss": 0.3583, "step": 12210, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9516728624535316, "success_rate.epoch.env.logic": 0.9161747343565525, "success_rate.epoch.env.math": 0.9748110831234257, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8644222020018199, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.871830601230009, "success_rate.epoch.global": 0.9140777560802414, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.987717048145225, "tokens_p.mean_below_band": 2.8312206268310547e-07, "tokens_p.mean_in_band": 0.4772267206477733, "tokens_rate.above_band": 0.7186613726602382, "tokens_rate.below_band": 0.0011344299489506524, "tokens_rate.in_band": 0.2802041973908111 }, { "epoch": 2.6022582019599487, "grad_norm": 111.40438070189366, "learning_rate": 3.425866569569708e-07, "loss": 0.2828, "step": 12215, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9166666666666666, "success_rate.epoch.env.math": 0.9748427672955975, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8640909090909091, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718643569545267, "success_rate.epoch.global": 0.9140528087896509, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983733733733734, "tokens_p.mean_in_band": 0.5924479166666666, "tokens_rate.above_band": 0.9940298507462687, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005970149253731343 }, { "epoch": 2.6033233915636984, "grad_norm": 96.6807663742799, "learning_rate": 3.4255592114102526e-07, "loss": 0.1794, "step": 12220, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9168618266978923, "success_rate.epoch.env.math": 0.9748743718592965, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8639455782312925, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.871871760021373, "success_rate.epoch.global": 0.9140279497611887, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966193931398417, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.994750656167979, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005249343832020997 }, { "epoch": 2.6043885811674476, "grad_norm": 143.38548451088715, "learning_rate": 3.42525198038199e-07, "loss": 0.2257, "step": 12225, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9169590643274854, "success_rate.epoch.env.math": 0.974937343358396, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8641919420552286, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715643685818047, "success_rate.epoch.global": 0.9140031785272824, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921328671328671, "tokens_p.mean_in_band": 0.714453125, "tokens_rate.above_band": 0.8773006134969326, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12269938650306748 }, { "epoch": 2.6054537707711973, "grad_norm": 43.62137269560941, "learning_rate": 3.4249448767088283e-07, "loss": 0.112, "step": 12230, "success_rate.epoch.env.abd": 0.9841897233201581, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.9743589743589743, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9170560747663551, "success_rate.epoch.env.math": 0.9749530369442705, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8642309427153811, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715781599167953, "success_rate.epoch.global": 0.9139784946236559, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954166666666666, "tokens_p.mean_in_band": 0.6376953125, "tokens_rate.above_band": 0.974025974025974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025974025974025976 }, { "epoch": 2.606518960374947, "grad_norm": 95.49498414806867, "learning_rate": 3.4246379006145827e-07, "loss": 0.2985, "step": 12235, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.9675675675675676, "success_rate.epoch.env.agentgym:textcraft": 0.975, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9159859976662778, "success_rate.epoch.env.math": 0.9749843652282677, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8631863186318632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.871452696091814, "success_rate.epoch.global": 0.9134260073904628, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.6799999999999999, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9967912946428571, "tokens_p.mean_in_band": 0.5739889705882353, "tokens_rate.above_band": 0.9294605809128631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07053941908713693 }, { "epoch": 2.6075841499786963, "grad_norm": 87.8559922223753, "learning_rate": 3.4243310523229753e-07, "loss": 0.2659, "step": 12240, "success_rate.epoch.env.abd": 0.984251968503937, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9518518518518518, "success_rate.epoch.env.logic": 0.9140534262485482, "success_rate.epoch.env.math": 0.9750467872738615, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8631863186318632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8713539665137895, "success_rate.epoch.global": 0.9132267697171965, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0005063657407407, "tokens_p.mean_in_band": 0.41271551724137934, "tokens_rate.above_band": 0.9781132075471698, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02188679245283019 }, { "epoch": 2.6086493395824455, "grad_norm": 129.2130996445585, "learning_rate": 3.424024332057634e-07, "loss": 0.4608, "step": 12245, "success_rate.epoch.env.abd": 0.984313725490196, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9520295202952029, "success_rate.epoch.env.logic": 0.91415313225058, "success_rate.epoch.env.math": 0.9751088985687617, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8633093525179856, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714016280240319, "success_rate.epoch.global": 0.9133637320238512, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989388794567062, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9949324324324325, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005067567567567568 }, { "epoch": 2.609714529186195, "grad_norm": 79.208831291153, "learning_rate": 3.423717740042095e-07, "loss": 0.2418, "step": 12250, "success_rate.epoch.env.abd": 0.984313725490196, "success_rate.epoch.env.agentgym:alfworld": 0.8641975308641975, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9520295202952029, "success_rate.epoch.env.logic": 0.9143518518518519, "success_rate.epoch.env.math": 0.9751707014276847, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8635547576301615, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8714476214397018, "success_rate.epoch.global": 0.913515406162465, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946428571428572, "tokens_p.mean_in_band": 0.8072916666666666, "tokens_rate.above_band": 0.9668508287292817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03314917127071823 }, { "epoch": 2.610779718789945, "grad_norm": 38.64000966474176, "learning_rate": 3.4234112764998004e-07, "loss": 0.1079, "step": 12255, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8650306748466258, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9522058823529411, "success_rate.epoch.env.logic": 0.9144508670520232, "success_rate.epoch.env.math": 0.9752014879107254, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8637992831541219, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715789948721693, "success_rate.epoch.global": 0.9136665501572877, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996522257551669, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.611844908393694, "grad_norm": 453.03288817797215, "learning_rate": 3.4231049416540986e-07, "loss": 0.3774, "step": 12260, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8650306748466258, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9145496535796767, "success_rate.epoch.env.math": 0.9752628324056896, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8635957066189625, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715909607372119, "success_rate.epoch.global": 0.9136427076064201, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963235294117647, "tokens_p.mean_in_band": 0.3515625, "tokens_rate.above_band": 0.9714285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02857142857142857 }, { "epoch": 2.6129100979974433, "grad_norm": 125.18524775678662, "learning_rate": 3.422798735728244e-07, "loss": 0.2371, "step": 12265, "success_rate.epoch.env.abd": 0.984375, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.975609756097561, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9147465437788018, "success_rate.epoch.env.math": 0.9753238741517581, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8633318445734703, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716652383362719, "success_rate.epoch.global": 0.9136189481017067, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966836734693878, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9760956175298805, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02390438247011952 }, { "epoch": 2.613975287601193, "grad_norm": 93.96632888151346, "learning_rate": 3.422492658945397e-07, "loss": 0.2015, "step": 12270, "success_rate.epoch.env.abd": 0.9844357976653697, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9147465437788018, "success_rate.epoch.env.math": 0.9753846153846154, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.863514719000892, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717613871033055, "success_rate.epoch.global": 0.913769123783032, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983212809917356, "tokens_p.mean_in_band": 0.73828125, "tokens_rate.above_band": 0.983739837398374, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016260162601626018 }, { "epoch": 2.6150404772049427, "grad_norm": 120.6420295419577, "learning_rate": 3.422186711528625e-07, "loss": 0.1961, "step": 12275, "success_rate.epoch.env.abd": 0.9844961240310077, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9147465437788018, "success_rate.epoch.env.math": 0.9754299754299754, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8638790035587188, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718041117368348, "success_rate.epoch.global": 0.9139187782020132, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923537234042553, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.9690721649484536, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030927835051546393 }, { "epoch": 2.616105666808692, "grad_norm": 82.01705876916566, "learning_rate": 3.4218808937009e-07, "loss": 0.3291, "step": 12280, "success_rate.epoch.env.abd": 0.9844961240310077, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9138920780711826, "success_rate.epoch.env.math": 0.9755052051439069, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.864, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717442717775253, "success_rate.epoch.global": 0.9138946638946639, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982114467408585, "tokens_p.mean_in_band": 0.48662109375, "tokens_rate.above_band": 0.9402092675635276, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059790732436472344 }, { "epoch": 2.617170856412441, "grad_norm": 180.05305984008223, "learning_rate": 3.4215752056851e-07, "loss": 0.2132, "step": 12285, "success_rate.epoch.env.abd": 0.9845559845559846, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9140893470790378, "success_rate.epoch.env.math": 0.9755501222493888, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8642413487133984, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717936713549538, "success_rate.epoch.global": 0.914043583535109, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969135802469136, "tokens_p.mean_in_band": 0.59296875, "tokens_rate.above_band": 0.9418604651162791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05813953488372093 }, { "epoch": 2.618236046016191, "grad_norm": 38.28839485192977, "learning_rate": 3.4212696477040066e-07, "loss": 0.2753, "step": 12290, "success_rate.epoch.env.abd": 0.9845559845559846, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.9140893470790378, "success_rate.epoch.env.math": 0.975594874923734, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.863716814159292, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717658541063021, "success_rate.epoch.global": 0.9138466850828729, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954308093994778, "tokens_p.mean_in_band": 0.5618489583333334, "tokens_rate.above_band": 0.9845758354755784, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015424164524421594 }, { "epoch": 2.6193012356199405, "grad_norm": 74.68936543167256, "learning_rate": 3.420964219980311e-07, "loss": 0.1572, "step": 12295, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9679144385026738, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.9144811858608894, "success_rate.epoch.env.math": 0.975594874923734, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8640176600441501, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718342254450394, "success_rate.epoch.global": 0.9139951740779042, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9922680412371134, "tokens_p.mean_in_band": 0.7869318181818182, "tokens_rate.above_band": 0.8981481481481481, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10185185185185185 }, { "epoch": 2.62036642522369, "grad_norm": 199.40942827091234, "learning_rate": 3.4206589227366043e-07, "loss": 0.2017, "step": 12300, "success_rate.epoch.env.abd": 0.9846743295019157, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.9145785876993167, "success_rate.epoch.env.math": 0.9750456482045039, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8641975308641975, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718303762365012, "success_rate.epoch.global": 0.9139710942876806, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915644171779141, "tokens_p.mean_in_band": 0.7388392857142857, "tokens_rate.above_band": 0.9588235294117647, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041176470588235294 }, { "epoch": 2.621431614827439, "grad_norm": 161.56315644597106, "learning_rate": 3.420353756195386e-07, "loss": 0.178, "step": 12305, "success_rate.epoch.env.abd": 0.9847908745247148, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9525547445255474, "success_rate.epoch.env.logic": 0.9146757679180887, "success_rate.epoch.env.math": 0.9750911300121506, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8644366197183099, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718756758640768, "success_rate.epoch.global": 0.9141188594984542, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954954954954955, "tokens_p.mean_in_band": 0.83671875, "tokens_rate.above_band": 0.9568965517241379, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04310344827586207 }, { "epoch": 2.6224968044311887, "grad_norm": 1129.3170743467604, "learning_rate": 3.4200487205790593e-07, "loss": 0.3213, "step": 12310, "success_rate.epoch.env.abd": 0.9847908745247148, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9528985507246377, "success_rate.epoch.env.logic": 0.9147727272727273, "success_rate.epoch.env.math": 0.975121359223301, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8642355008787346, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719155611182853, "success_rate.epoch.global": 0.9140946502057613, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984206989247312, "tokens_p.mean_in_band": 0.6102764423076923, "tokens_rate.above_band": 0.9862142099681867, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013785790031813362 }, { "epoch": 2.6235619940349384, "grad_norm": 20.309635711448852, "learning_rate": 3.4197438161099324e-07, "loss": 0.125, "step": 12315, "success_rate.epoch.env.abd": 0.9849056603773585, "success_rate.epoch.env.agentgym:alfworld": 0.8658536585365854, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9149659863945578, "success_rate.epoch.env.math": 0.9751665657177468, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8643546971027217, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716457775476291, "success_rate.epoch.global": 0.9140705237932215, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923189769707705, "tokens_p.mean_in_band": 0.6464285714285715, "tokens_rate.above_band": 0.9416180150125104, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058381984987489574 }, { "epoch": 2.6246271836386876, "grad_norm": 50.60032986386894, "learning_rate": 3.4194390430102167e-07, "loss": 0.3125, "step": 12320, "success_rate.epoch.env.abd": 0.9850187265917603, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9149659863945578, "success_rate.epoch.env.math": 0.975211608222491, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8641542506573181, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717158384934313, "success_rate.epoch.global": 0.9140464798359536, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960411051212938, "tokens_p.mean_in_band": 0.564453125, "tokens_rate.above_band": 0.9867021276595744, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013297872340425532 }, { "epoch": 2.6256923732424373, "grad_norm": 552.7538325052278, "learning_rate": 3.419134401502028e-07, "loss": 0.3424, "step": 12325, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8674698795180723, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9149659863945578, "success_rate.epoch.env.math": 0.9752714113389626, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8642732049036778, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718202411123095, "success_rate.epoch.global": 0.9141931081542136, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982775590551181, "tokens_p.mean_in_band": 0.673828125, "tokens_rate.above_band": 0.9883268482490273, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011673151750972763 }, { "epoch": 2.6267575628461866, "grad_norm": 78.88062688923914, "learning_rate": 3.418829891807389e-07, "loss": 0.1106, "step": 12330, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.9682539682539683, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9494584837545126, "success_rate.epoch.env.logic": 0.9150622876557192, "success_rate.epoch.env.math": 0.9747140276941602, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8641328090869376, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718377061428039, "success_rate.epoch.global": 0.9139986376021798, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975961538461539, "tokens_p.mean_in_band": 0.6390625, "tokens_rate.above_band": 0.9676375404530745, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032362459546925564 }, { "epoch": 2.6278227524499362, "grad_norm": 326.9640938071797, "learning_rate": 3.4185255141482226e-07, "loss": 0.3564, "step": 12335, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.968421052631579, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9150622876557192, "success_rate.epoch.env.math": 0.9741741741741742, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8643698211949411, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718418922319767, "success_rate.epoch.global": 0.9139748384903095, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975833333333334, "tokens_p.mean_in_band": 0.52578125, "tokens_rate.above_band": 0.9868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013157894736842105 }, { "epoch": 2.6288879420536855, "grad_norm": 191.830844701382, "learning_rate": 3.4182212687463575e-07, "loss": 0.2215, "step": 12340, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.967741935483871, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9150622876557192, "success_rate.epoch.env.math": 0.9742051589682064, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8647826086956522, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8718972656044968, "success_rate.epoch.global": 0.9141208418194161, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960106382978723, "tokens_p.mean_in_band": 0.6927083333333334, "tokens_rate.above_band": 0.9591836734693877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04081632653061224 }, { "epoch": 2.629953131657435, "grad_norm": 0.0, "learning_rate": 3.4179171558235253e-07, "loss": 0.3357, "step": 12345, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9142212189616253, "success_rate.epoch.env.math": 0.9742206235011991, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8650173611111112, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719516031982392, "success_rate.epoch.global": 0.9140969162995595, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995336859688196, "tokens_p.mean_in_band": 0.71703125, "tokens_rate.above_band": 0.9472573839662447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052742616033755275 }, { "epoch": 2.6310183212611844, "grad_norm": 101.32455904266345, "learning_rate": 3.417613175601361e-07, "loss": 0.1704, "step": 12350, "success_rate.epoch.env.abd": 0.9851851851851852, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.9685863874345549, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9142212189616253, "success_rate.epoch.env.math": 0.974251497005988, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8650519031141869, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8719575500625905, "success_rate.epoch.global": 0.9140730717185386, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9902173913043478, "tokens_p.mean_in_band": 0.6592881944444444, "tokens_rate.above_band": 0.9274193548387096, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07258064516129033 }, { "epoch": 2.632083510864934, "grad_norm": 35.22862126377775, "learning_rate": 3.417309328301404e-07, "loss": 0.177, "step": 12355, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9142212189616253, "success_rate.epoch.env.math": 0.9743130227001194, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.8652849740932642, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.87200417519459, "success_rate.epoch.global": 0.9142181695373185, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996875, "tokens_p.mean_in_band": 0.82421875, "tokens_rate.above_band": 0.9815950920245399, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018404907975460124 }, { "epoch": 2.6331487004686833, "grad_norm": 379.97062484076656, "learning_rate": 3.4170056141450957e-07, "loss": 0.427, "step": 12360, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8682634730538922, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9132882882882883, "success_rate.epoch.env.math": 0.9743283582089552, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8656330749354005, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716283064026846, "success_rate.epoch.global": 0.914025623735671, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9956098942598187, "tokens_p.mean_in_band": 0.5803052325581395, "tokens_rate.above_band": 0.9390070921985816, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06099290780141844 }, { "epoch": 2.634213890072433, "grad_norm": 76.11846409403248, "learning_rate": 3.416702033353781e-07, "loss": 0.2087, "step": 12365, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8690476190476191, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9134831460674158, "success_rate.epoch.env.math": 0.9743895175699822, "success_rate.epoch.env.sat": 0.11764705882352941, "success_rate.epoch.env.science": 0.8658064516129033, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8717386282037196, "success_rate.epoch.global": 0.9141703130259172, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968377976190477, "tokens_p.mean_in_band": 0.884765625, "tokens_rate.above_band": 0.9882352941176471, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011764705882352941 }, { "epoch": 2.6352790796761822, "grad_norm": 27.976698684638354, "learning_rate": 3.416398586148707e-07, "loss": 0.1146, "step": 12370, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8698224852071006, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9498207885304659, "success_rate.epoch.env.logic": 0.9135802469135802, "success_rate.epoch.env.math": 0.974435196195006, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8660369257191928, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8715374259490964, "success_rate.epoch.global": 0.9141465053763441, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963417658730159, "tokens_p.mean_in_band": 0.6346354166666667, "tokens_rate.above_band": 0.9710982658959537, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028901734104046242 }, { "epoch": 2.636344269279932, "grad_norm": 88.06362553512136, "learning_rate": 3.4160952727510236e-07, "loss": 0.4213, "step": 12375, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.95, "success_rate.epoch.env.logic": 0.9136771300448431, "success_rate.epoch.env.math": 0.974435196195006, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8660102739726028, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716297162164803, "success_rate.epoch.global": 0.9141227775914122, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972644658753709, "tokens_p.mean_in_band": 0.53984375, "tokens_rate.above_band": 0.9853801169590644, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014619883040935672 }, { "epoch": 2.637409458883681, "grad_norm": 1299.8528138523136, "learning_rate": 3.4157920933817844e-07, "loss": 0.5056, "step": 12380, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.96875, "success_rate.epoch.env.ded": 0.9501779359430605, "success_rate.epoch.env.logic": 0.9138702460850112, "success_rate.epoch.env.math": 0.9744807121661722, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8658119658119658, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716495580159129, "success_rate.epoch.global": 0.9140991292699263, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959677419354839, "tokens_p.mean_in_band": 0.61328125, "tokens_rate.above_band": 0.9850467289719627, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014953271028037384 }, { "epoch": 2.638474648487431, "grad_norm": 42.82145309479514, "learning_rate": 3.415489048261944e-07, "loss": 0.1889, "step": 12385, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8705882352941177, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.950530035335689, "success_rate.epoch.env.logic": 0.9129464285714286, "success_rate.epoch.env.math": 0.9745260663507109, "success_rate.epoch.env.sat": 0.11428571428571428, "success_rate.epoch.env.science": 0.8659265584970111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8716982124200583, "success_rate.epoch.global": 0.9140755600133734, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996821530418251, "tokens_p.mean_in_band": 0.6125, "tokens_rate.above_band": 0.9905838041431262, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009416195856873822 }, { "epoch": 2.63953983809118, "grad_norm": 354.2634797140363, "learning_rate": 3.4151861376123587e-07, "loss": 0.3427, "step": 12390, "success_rate.epoch.env.abd": 0.985239852398524, "success_rate.epoch.env.agentgym:alfworld": 0.8713450292397661, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.9130434782608695, "success_rate.epoch.env.math": 0.9745411486086442, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.86615515771526, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8740661994528959, "success_rate.epoch.global": 0.914218958611482, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996171516079633, "tokens_p.mean_in_band": 0.70703125, "tokens_rate.above_band": 0.9994897959183674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0005102040816326531 }, { "epoch": 2.6406050276949298, "grad_norm": 260.97235863946565, "learning_rate": 3.414883361653788e-07, "loss": 0.2062, "step": 12395, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.9123196448390677, "success_rate.epoch.env.math": 0.9745562130177515, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8663260962111536, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.874090238429967, "success_rate.epoch.global": 0.9141952682439187, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984756097560976, "tokens_p.mean_in_band": 0.7408854166666666, "tokens_rate.above_band": 0.9933920704845814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006607929515418502 }, { "epoch": 2.641670217298679, "grad_norm": 122.82801333251628, "learning_rate": 3.414580720606894e-07, "loss": 0.2721, "step": 12400, "success_rate.epoch.env.abd": 0.9853479853479854, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9508771929824561, "success_rate.epoch.env.logic": 0.912707182320442, "success_rate.epoch.env.math": 0.9746012994683992, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8664398128455976, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8741448028179115, "success_rate.epoch.global": 0.9143379906852961, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986772486772487, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9947368421052631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005263157894736842 }, { "epoch": 2.6427354069024287, "grad_norm": 106.00914502389531, "learning_rate": 3.4142782146922374e-07, "loss": 0.3099, "step": 12405, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9689119170984456, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.951048951048951, "success_rate.epoch.env.logic": 0.91280353200883, "success_rate.epoch.env.math": 0.9746462264150944, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8666100254885302, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8742083154496375, "success_rate.epoch.global": 0.9144802391232149, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949448529411765, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9927007299270073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0072992700729927005 }, { "epoch": 2.643800596506178, "grad_norm": 95.61160162625873, "learning_rate": 3.413975844130284e-07, "loss": 0.2611, "step": 12410, "success_rate.epoch.env.abd": 0.9854014598540146, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.951048951048951, "success_rate.epoch.env.logic": 0.9128996692392503, "success_rate.epoch.env.math": 0.974676089517079, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8669491525423729, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8742651677438428, "success_rate.epoch.global": 0.9146220159151194, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9950331125827815, "tokens_p.mean_in_band": 0.845703125, "tokens_rate.above_band": 0.9741935483870968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025806451612903226 }, { "epoch": 2.6448657861099276, "grad_norm": 52.318647459045245, "learning_rate": 3.4136736091413977e-07, "loss": 0.1276, "step": 12415, "success_rate.epoch.env.abd": 0.9854545454545455, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9130913091309131, "success_rate.epoch.env.math": 0.9747207524985303, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8671180702496826, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.874322337500854, "success_rate.epoch.global": 0.9147633234028467, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986349453978159, "tokens_p.mean_in_band": 0.73828125, "tokens_rate.above_band": 0.9968895800933126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003110419906687403 }, { "epoch": 2.645930975713677, "grad_norm": 223.657351330841, "learning_rate": 3.413371509945847e-07, "loss": 0.2661, "step": 12420, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9131868131868132, "success_rate.epoch.env.math": 0.9747800586510263, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8672865595942519, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8743709379333583, "success_rate.epoch.global": 0.9149041639127561, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.6469961653174265, "grad_norm": 275.558977706429, "learning_rate": 3.413069546763799e-07, "loss": 0.3893, "step": 12425, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.872093023255814, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9133771929824561, "success_rate.epoch.env.math": 0.974824355971897, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8675664276676508, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8744177147688051, "success_rate.epoch.global": 0.9150445397558562, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9876373626373627, "tokens_p.mean_in_band": 0.7940340909090909, "tokens_rate.above_band": 0.8921568627450981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10784313725490197 }, { "epoch": 2.6480613549211762, "grad_norm": 56.73580731328113, "learning_rate": 3.412767719815321e-07, "loss": 0.3102, "step": 12430, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9761904761904762, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.912472647702407, "success_rate.epoch.env.math": 0.9748684979544127, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8677894736842106, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.873901500458314, "success_rate.epoch.global": 0.9148550724637681, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981617647058824, "tokens_p.mean_in_band": 0.5625651041666667, "tokens_rate.above_band": 0.9645390070921985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03546099290780142 }, { "epoch": 2.6491265445249255, "grad_norm": 148.82700566987907, "learning_rate": 3.4124660293203834e-07, "loss": 0.229, "step": 12435, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8670520231213873, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9114754098360656, "success_rate.epoch.env.math": 0.9749417249417249, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8679562657695542, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8738829996458005, "success_rate.epoch.global": 0.9148306478132193, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960648148148148, "tokens_p.mean_in_band": 0.3863146551724138, "tokens_rate.above_band": 0.9588068181818182, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041193181818181816 }, { "epoch": 2.6501917341286747, "grad_norm": 24.734506480244168, "learning_rate": 3.4121644754988565e-07, "loss": 0.2465, "step": 12440, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.867816091954023, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9114754098360656, "success_rate.epoch.env.math": 0.9744186046511628, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8677581863979849, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8739011682859619, "success_rate.epoch.global": 0.9146421536441235, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989754098360656, "tokens_p.mean_in_band": 0.4957932692307692, "tokens_rate.above_band": 0.9704545454545455, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029545454545454545 }, { "epoch": 2.6512569237324244, "grad_norm": 170.44071539248512, "learning_rate": 3.4118630585705095e-07, "loss": 0.1713, "step": 12445, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.867816091954023, "success_rate.epoch.env.agentgym:sciworld": 0.9693877551020408, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9116684841875682, "success_rate.epoch.env.math": 0.9744483159117305, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.867197318810222, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.873870433560899, "success_rate.epoch.global": 0.9144542772861357, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985074626865672, "tokens_p.mean_in_band": 0.4775390625, "tokens_rate.above_band": 0.9882005899705014, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011799410029498525 }, { "epoch": 2.652322113336174, "grad_norm": 115.991140567978, "learning_rate": 3.411561778755014e-07, "loss": 0.2379, "step": 12450, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.9695431472081218, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9117647058823529, "success_rate.epoch.env.math": 0.9744927536231884, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8673640167364016, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.8740490553156864, "success_rate.epoch.global": 0.9145942408376964, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9994095816464238, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9986522911051213, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0013477088948787063 }, { "epoch": 2.6533873029399233, "grad_norm": 77.38937991969678, "learning_rate": 3.4112606362719396e-07, "loss": 0.1455, "step": 12455, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8693181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9512195121951219, "success_rate.epoch.env.logic": 0.9120521172638436, "success_rate.epoch.env.math": 0.9745370370370371, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8675302966987045, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.874108309610822, "success_rate.epoch.global": 0.914733747141457, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973684210526316, "tokens_p.mean_in_band": 0.8489583333333334, "tokens_rate.above_band": 0.9693877551020408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030612244897959183 }, { "epoch": 2.6544524925436725, "grad_norm": 267.3205182538426, "learning_rate": 3.4109596313407576e-07, "loss": 0.2356, "step": 12460, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8700564971751412, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.9122426868905742, "success_rate.epoch.env.math": 0.9745370370370371, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.8678065054211843, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8713923523744523, "success_rate.epoch.global": 0.9147097195042401, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968581081081082, "tokens_p.mean_in_band": 0.6544744318181818, "tokens_rate.above_band": 0.965553235908142, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03444676409185804 }, { "epoch": 2.6555176821474222, "grad_norm": 74.44973028954672, "learning_rate": 3.4106587641808375e-07, "loss": 0.1964, "step": 12465, "success_rate.epoch.env.abd": 0.9855595667870036, "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.9122426868905742, "success_rate.epoch.env.math": 0.974581166955517, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8676103247293921, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8711084010618996, "success_rate.epoch.global": 0.9145229566916314, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979416167664671, "tokens_p.mean_in_band": 0.6551339285714286, "tokens_rate.above_band": 0.9597701149425287, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040229885057471264 }, { "epoch": 2.656582871751172, "grad_norm": 146.02242392499596, "learning_rate": 3.4103580350114494e-07, "loss": 0.1527, "step": 12470, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9513888888888888, "success_rate.epoch.env.logic": 0.9125269978401728, "success_rate.epoch.env.math": 0.9746105020196192, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8674147963424771, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8711238612101858, "success_rate.epoch.global": 0.9144993498049415, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.5, "tokens_rate.above_band": 0.9574468085106383, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0425531914893617 }, { "epoch": 2.657648061354921, "grad_norm": 80.72433270935532, "learning_rate": 3.410057444051762e-07, "loss": 0.2261, "step": 12475, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8707865168539326, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9127155172413793, "success_rate.epoch.env.math": 0.9746835443037974, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8675249169435216, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8711729418304717, "success_rate.epoch.global": 0.9146381045115223, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982726130653267, "tokens_p.mean_in_band": 0.8580729166666666, "tokens_rate.above_band": 0.9851485148514851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01485148514851485 }, { "epoch": 2.6587132509586704, "grad_norm": 578.3522911448025, "learning_rate": 3.409756991520845e-07, "loss": 0.3023, "step": 12480, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8715083798882681, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9128094725511302, "success_rate.epoch.env.math": 0.9747126436781609, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.867854183927092, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8712796858942916, "success_rate.epoch.global": 0.9147764095917045, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994894801980198, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9619047619047619, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0380952380952381 }, { "epoch": 2.65977844056242, "grad_norm": 54.715151789937785, "learning_rate": 3.4094566776376654e-07, "loss": 0.2974, "step": 12485, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9129032258064517, "success_rate.epoch.env.math": 0.9742120343839542, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8679635761589404, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8713313818089375, "success_rate.epoch.global": 0.9147525072791977, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988290398126464, "tokens_p.mean_in_band": 0.4895833333333333, "tokens_rate.above_band": 0.9930232558139535, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0069767441860465115 }, { "epoch": 2.6608436301661698, "grad_norm": 142.77648136138615, "learning_rate": 3.40915650262109e-07, "loss": 0.1973, "step": 12490, "success_rate.epoch.env.abd": 0.9856115107913669, "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9515570934256056, "success_rate.epoch.env.logic": 0.9129032258064517, "success_rate.epoch.env.math": 0.9742857142857143, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8678232135481206, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8713253197444775, "success_rate.epoch.global": 0.9147286821705426, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.990301724137931, "tokens_p.mean_in_band": 0.803515625, "tokens_rate.above_band": 0.9206349206349206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07936507936507936 }, { "epoch": 2.661908819769919, "grad_norm": 139.77275910617925, "learning_rate": 3.408856466689884e-07, "loss": 0.5987, "step": 12495, "success_rate.epoch.env.abd": 0.985663082437276, "success_rate.epoch.env.agentgym:alfworld": 0.8722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9517241379310345, "success_rate.epoch.env.logic": 0.9121114683815649, "success_rate.epoch.env.math": 0.9743150684931506, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8679867986798679, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8712907559322627, "success_rate.epoch.global": 0.9147049338922928, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990666482300885, "tokens_p.mean_in_band": 0.5896935096153846, "tokens_rate.above_band": 0.9720430107526882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02795698924731183 }, { "epoch": 2.6629740093736682, "grad_norm": 75.85065896581979, "learning_rate": 3.4085565700627113e-07, "loss": 0.2049, "step": 12500, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9517241379310345, "success_rate.epoch.env.logic": 0.9122994652406418, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8677379480840544, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8713580477485764, "success_rate.epoch.global": 0.9146812620734063, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992433414043583, "tokens_p.mean_in_band": 0.3776041666666667, "tokens_rate.above_band": 0.9927884615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007211538461538462 }, { "epoch": 2.664039198977418, "grad_norm": 314.7658792713378, "learning_rate": 3.408256812958135e-07, "loss": 0.245, "step": 12505, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9518900343642611, "success_rate.epoch.env.logic": 0.9123931623931624, "success_rate.epoch.env.math": 0.9744172825469016, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8679555738379268, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8714067320692628, "success_rate.epoch.global": 0.9148183863709418, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987726586102719, "tokens_p.mean_in_band": 0.798828125, "tokens_rate.above_band": 0.9880597014925373, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011940298507462687 }, { "epoch": 2.6651043885811676, "grad_norm": 22.935712991378168, "learning_rate": 3.407957195594615e-07, "loss": 0.0616, "step": 12510, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8729281767955801, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.952054794520548, "success_rate.epoch.env.logic": 0.9124866595517609, "success_rate.epoch.env.math": 0.9744753261486103, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8681183237469187, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8714549038571228, "success_rate.epoch.global": 0.9149550706033376, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986640334572491, "tokens_p.mean_in_band": 0.8079427083333334, "tokens_rate.above_band": 0.9889705882352942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011029411764705883 }, { "epoch": 2.666169578184917, "grad_norm": 76.3643800223536, "learning_rate": 3.407657718190511e-07, "loss": 0.2555, "step": 12515, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8736263736263736, "success_rate.epoch.env.agentgym:sciworld": 0.9698492462311558, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9696969696969697, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9116080937167199, "success_rate.epoch.env.math": 0.9745042492917847, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8683347005742412, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8714756826280164, "success_rate.epoch.global": 0.9149311118231336, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959503239740821, "tokens_p.mean_in_band": 0.7277644230769231, "tokens_rate.above_band": 0.9907275320970043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009272467902995721 }, { "epoch": 2.6672347677886665, "grad_norm": 194.64564018268754, "learning_rate": 3.40735838096408e-07, "loss": 0.2488, "step": 12520, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9116080937167199, "success_rate.epoch.env.math": 0.9745475113122172, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8680868496517821, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8716281599425297, "success_rate.epoch.global": 0.9149072296865003, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993140243902439, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_rate.above_band": 0.9975669099756691, "tokens_rate.below_band": 0.0024330900243309003, "tokens_rate.in_band": 0.0 }, { "epoch": 2.6682999573924158, "grad_norm": 108.75745094931737, "learning_rate": 3.4070591841334763e-07, "loss": 0.2611, "step": 12525, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.8743169398907104, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9117021276595745, "success_rate.epoch.env.math": 0.9746192893401016, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8678936605316974, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.871625671110771, "success_rate.epoch.global": 0.9148834238262535, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9890813253012049, "tokens_p.mean_in_band": 0.6181640625, "tokens_rate.above_band": 0.8736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12631578947368421 }, { "epoch": 2.6693651469961654, "grad_norm": 0.0, "learning_rate": 3.4067601279167526e-07, "loss": 0.2377, "step": 12530, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9117021276595745, "success_rate.epoch.env.math": 0.9747191011235955, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8680016346546792, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8717066571122042, "success_rate.epoch.global": 0.9150191326530612, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988613360323887, "tokens_p.mean_in_band": 0.880859375, "tokens_rate.above_band": 0.9919678714859438, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008032128514056224 }, { "epoch": 2.6704303365999147, "grad_norm": 218.3030956182925, "learning_rate": 3.406461212531859e-07, "loss": 0.2067, "step": 12535, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9522184300341296, "success_rate.epoch.env.logic": 0.9109225874867445, "success_rate.epoch.env.math": 0.9747616376892877, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.8678090575275398, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8716221497727245, "success_rate.epoch.global": 0.9148360394778733, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8055555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972411717495987, "tokens_p.mean_in_band": 0.46986607142857145, "tokens_rate.above_band": 0.956989247311828, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043010752688172046 }, { "epoch": 2.6714955262036644, "grad_norm": 171.61619979360648, "learning_rate": 3.406162438196643e-07, "loss": 0.1834, "step": 12540, "success_rate.epoch.env.abd": 0.9857651245551602, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9705882352941176, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9110169491525424, "success_rate.epoch.env.math": 0.9747757847533632, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.8680781758957655, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8713613986499823, "success_rate.epoch.global": 0.9148124602670057, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972363945578231, "tokens_p.mean_in_band": 0.6227678571428571, "tokens_rate.above_band": 0.9767441860465116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023255813953488372 }, { "epoch": 2.6725607158074136, "grad_norm": 135.820816695306, "learning_rate": 3.4058638051288497e-07, "loss": 0.1628, "step": 12545, "success_rate.epoch.env.abd": 0.9858156028368794, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9714285714285714, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9112050739957717, "success_rate.epoch.env.math": 0.9748181309457191, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8681318681318682, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8711761785613432, "success_rate.epoch.global": 0.91478895588702, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973958333333334, "tokens_p.mean_in_band": 0.7047697368421053, "tokens_rate.above_band": 0.9578713968957872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04212860310421286 }, { "epoch": 2.6736259054111633, "grad_norm": 306.6589585928038, "learning_rate": 3.40556531354612e-07, "loss": 0.2072, "step": 12550, "success_rate.epoch.env.abd": 0.9858657243816255, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9702970297029703, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9714285714285714, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9113924050632911, "success_rate.epoch.env.math": 0.9748603351955307, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8679398618447786, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8711841467954329, "success_rate.epoch.global": 0.914765525982256, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904983108108109, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02631578947368421 }, { "epoch": 2.6746910950149125, "grad_norm": 547.4531653007602, "learning_rate": 3.4052669636659917e-07, "loss": 0.4401, "step": 12555, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8763440860215054, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9104320337197049, "success_rate.epoch.env.math": 0.9748743718592965, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8676948051948052, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8712880044419972, "success_rate.epoch.global": 0.914583992407466, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8095238095238094, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999162198391421, "tokens_p.mean_in_band": 0.4446614583333333, "tokens_rate.above_band": 0.9920212765957447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007978723404255319 }, { "epoch": 2.675756284618662, "grad_norm": 88.24170323368735, "learning_rate": 3.4049687557059014e-07, "loss": 0.2505, "step": 12560, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8770053475935828, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.9096638655462185, "success_rate.epoch.env.math": 0.9748743718592965, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8680161943319838, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.87130750285434, "success_rate.epoch.global": 0.9145609602021478, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962022569444444, "tokens_p.mean_in_band": 0.6235119047619048, "tokens_rate.above_band": 0.9320388349514563, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06796116504854369 }, { "epoch": 2.6768214742224115, "grad_norm": 67.47804442850637, "learning_rate": 3.40467068988318e-07, "loss": 0.1269, "step": 12565, "success_rate.epoch.env.abd": 0.9859649122807017, "success_rate.epoch.env.agentgym:alfworld": 0.8776595744680851, "success_rate.epoch.env.agentgym:sciworld": 0.9704433497536946, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.909853249475891, "success_rate.epoch.env.math": 0.9749303621169917, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8673139158576052, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8713299339371077, "success_rate.epoch.global": 0.9143803216650899, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004937923250565, "tokens_p.mean_in_band": 0.6892361111111112, "tokens_rate.above_band": 0.9899441340782122, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01005586592178771 }, { "epoch": 2.677886663826161, "grad_norm": 92.06360362357528, "learning_rate": 3.404372766415057e-07, "loss": 0.3672, "step": 12570, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8783068783068783, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.909853249475891, "success_rate.epoch.env.math": 0.9749582637729549, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.867124394184168, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.8713917196730193, "success_rate.epoch.global": 0.9143441977641317, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9979103343465046, "tokens_p.mean_in_band": 0.1728515625, "tokens_rate.above_band": 0.996969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0030303030303030303 }, { "epoch": 2.6789518534299104, "grad_norm": 82.6772385835561, "learning_rate": 3.4040749855186557e-07, "loss": 0.3534, "step": 12575, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.9707317073170731, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9525423728813559, "success_rate.epoch.env.logic": 0.909853249475891, "success_rate.epoch.env.math": 0.9749861033907726, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8674456083803385, "success_rate.epoch.env.webshop": 0.96875, "success_rate.epoch.env_macro_mean": 0.871494721123121, "success_rate.epoch.global": 0.9144788555258607, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974226804123711, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9974293059125964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002570694087403599 }, { "epoch": 2.68001704303366, "grad_norm": 134.60927404078097, "learning_rate": 3.4037773474109964e-07, "loss": 0.2746, "step": 12580, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9527027027027027, "success_rate.epoch.env.logic": 0.909853249475891, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8673100120627262, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8716548540605786, "success_rate.epoch.global": 0.914456129336054, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9666666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997229259024699, "tokens_p.mean_in_band": 0.5791015625, "tokens_rate.above_band": 0.9974731522425774, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002526847757422615 }, { "epoch": 2.6810822326374093, "grad_norm": 74.06099495629941, "learning_rate": 3.403479852308997e-07, "loss": 0.2647, "step": 12585, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9528619528619529, "success_rate.epoch.env.logic": 0.910135841170324, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8672282390693943, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8716875875933376, "success_rate.epoch.global": 0.9144334743770569, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968398876404494, "tokens_p.mean_in_band": 0.72412109375, "tokens_rate.above_band": 0.956989247311828, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043010752688172046 }, { "epoch": 2.682147422241159, "grad_norm": 128.30272537127172, "learning_rate": 3.4031825004294687e-07, "loss": 0.2658, "step": 12590, "success_rate.epoch.env.abd": 0.986013986013986, "success_rate.epoch.env.agentgym:alfworld": 0.8795811518324608, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9530201342281879, "success_rate.epoch.env.logic": 0.9102296450939458, "success_rate.epoch.env.math": 0.9750415973377704, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.867440929114938, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8717336123818078, "success_rate.epoch.global": 0.9145539906103286, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992045454545454, "tokens_p.mean_in_band": 0.7760416666666666, "tokens_rate.above_band": 0.9918845807033363, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008115419296663661 }, { "epoch": 2.6832126118449082, "grad_norm": 123.3337714347934, "learning_rate": 3.402885291989119e-07, "loss": 0.2349, "step": 12595, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.8802083333333334, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9530201342281879, "success_rate.epoch.env.logic": 0.9102296450939458, "success_rate.epoch.env.math": 0.9750968456004427, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8672530987604958, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8717830061240943, "success_rate.epoch.global": 0.91453125, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964812332439679, "tokens_p.mean_in_band": 0.729736328125, "tokens_rate.above_band": 0.979002624671916, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02099737532808399 }, { "epoch": 2.684277801448658, "grad_norm": 873.5598063836308, "learning_rate": 3.402588227204553e-07, "loss": 0.1938, "step": 12600, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.8808290155440415, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9094693028095734, "success_rate.epoch.env.math": 0.9751106194690266, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8674650698602795, "success_rate.epoch.env.webshop": 0.9696969696969697, "success_rate.epoch.env_macro_mean": 0.8718051159596943, "success_rate.epoch.global": 0.9145085803432137, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999003984063745, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9866352201257862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013364779874213837 }, { "epoch": 2.6853429910524076, "grad_norm": 243.5140877420023, "learning_rate": 3.402291306292268e-07, "loss": 0.3984, "step": 12605, "success_rate.epoch.env.abd": 0.9860627177700348, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.970873786407767, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9095634095634095, "success_rate.epoch.env.math": 0.9751518498067366, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.867277799920287, "success_rate.epoch.env.webshop": 0.9705882352941176, "success_rate.epoch.env_macro_mean": 0.8719372628845791, "success_rate.epoch.global": 0.9144859813084112, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973341232227488, "tokens_p.mean_in_band": 0.5738636363636364, "tokens_rate.above_band": 0.9829192546583851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017080745341614908 }, { "epoch": 2.686408180656157, "grad_norm": 115.42695363407147, "learning_rate": 3.4019945294686585e-07, "loss": 0.1744, "step": 12610, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.966183574879227, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9099378881987578, "success_rate.epoch.env.math": 0.9751792608935466, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8673835125448028, "success_rate.epoch.env.webshop": 0.9705882352941176, "success_rate.epoch.env_macro_mean": 0.8715614250808709, "success_rate.epoch.global": 0.9144634525660964, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984461325966851, "tokens_p.mean_in_band": 0.1328125, "tokens_rate.above_band": 0.9986206896551724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001379310344827586 }, { "epoch": 2.687473370259906, "grad_norm": 68.83061612542053, "learning_rate": 3.401697896950012e-07, "loss": 0.2169, "step": 12615, "success_rate.epoch.env.abd": 0.9861111111111112, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9663461538461539, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9531772575250836, "success_rate.epoch.env.logic": 0.9099378881987578, "success_rate.epoch.env.math": 0.9752066115702479, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.867699642431466, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8716838246867568, "success_rate.epoch.global": 0.9145962732919255, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983789625360231, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.994269340974212, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0057306590257879654 }, { "epoch": 2.6885385598636558, "grad_norm": 42.58555076893122, "learning_rate": 3.401401408952514e-07, "loss": 0.1896, "step": 12620, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9099378881987578, "success_rate.epoch.env.math": 0.9752339020363237, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8679619349722443, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8717433465450753, "success_rate.epoch.global": 0.9147286821705426, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992732558139535, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9950413223140496, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0049586776859504135 }, { "epoch": 2.6896037494674054, "grad_norm": 128.33269913118167, "learning_rate": 3.401105065692242e-07, "loss": 0.3188, "step": 12625, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9533333333333334, "success_rate.epoch.env.logic": 0.9101239669421488, "success_rate.epoch.env.math": 0.9752883031301482, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8681710213776722, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8717842162034976, "success_rate.epoch.global": 0.9148606811145511, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9884510869565217, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9387755102040817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061224489795918366 }, { "epoch": 2.6906689390711547, "grad_norm": 28.585383766342733, "learning_rate": 3.400808867385169e-07, "loss": 0.1609, "step": 12630, "success_rate.epoch.env.abd": 0.986159169550173, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9665071770334929, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.9722222222222222, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9103092783505154, "success_rate.epoch.env.math": 0.975328947368421, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8683794466403162, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8718377998097678, "success_rate.epoch.global": 0.9149922720247295, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9938322368421053, "tokens_p.mean_in_band": 0.7607421875, "tokens_rate.above_band": 0.9743589743589743, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02564102564102564 }, { "epoch": 2.691734128674904, "grad_norm": 227.12093536884058, "learning_rate": 3.400512814247162e-07, "loss": 0.1493, "step": 12635, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.9666666666666667, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9534883720930233, "success_rate.epoch.env.logic": 0.9104938271604939, "success_rate.epoch.env.math": 0.975355969331873, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8685353335965259, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8719582930020512, "success_rate.epoch.global": 0.9151234567901234, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975458115183246, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.6927993182786536, "grad_norm": 65.53678140940933, "learning_rate": 3.400216906493982e-07, "loss": 0.2631, "step": 12640, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8814432989690721, "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9536423841059603, "success_rate.epoch.env.logic": 0.9106776180698152, "success_rate.epoch.env.math": 0.9753963914707491, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8682965299684543, "success_rate.epoch.env.webshop": 0.9714285714285714, "success_rate.epoch.env_macro_mean": 0.8719853293035201, "success_rate.epoch.global": 0.9151001540832049, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991349480968859, "tokens_p.mean_in_band": 0.6575520833333334, "tokens_rate.above_band": 0.9796610169491525, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020338983050847456 }, { "epoch": 2.6938645078824033, "grad_norm": 85.14168393718177, "learning_rate": 3.399921144341285e-07, "loss": 0.4298, "step": 12645, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9537953795379538, "success_rate.epoch.env.logic": 0.90992835209826, "success_rate.epoch.env.math": 0.9748771163298744, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8684003152088259, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8720207723431379, "success_rate.epoch.global": 0.914923076923077, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.861111111111111, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985029940119761, "tokens_p.mean_in_band": 0.68125, "tokens_rate.above_band": 0.9852507374631269, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014749262536873156 }, { "epoch": 2.6949296974861525, "grad_norm": 162.84839387484718, "learning_rate": 3.3996255280046204e-07, "loss": 0.3466, "step": 12650, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.882051282051282, "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.954248366013072, "success_rate.epoch.env.logic": 0.9101123595505618, "success_rate.epoch.env.math": 0.9749045280960175, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8681621408894136, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8720595206498789, "success_rate.epoch.global": 0.914900153609831, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999058734939759, "tokens_p.mean_in_band": 0.19375, "tokens_rate.above_band": 0.9900596421471173, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009940357852882704 }, { "epoch": 2.6959948870899018, "grad_norm": 165.0099130703682, "learning_rate": 3.399330057699431e-07, "loss": 0.1159, "step": 12655, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.954248366013072, "success_rate.epoch.env.logic": 0.9103869653767821, "success_rate.epoch.env.math": 0.974959172563963, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8682658277624853, "success_rate.epoch.env.webshop": 0.9722222222222222, "success_rate.epoch.env_macro_mean": 0.8721535857717377, "success_rate.epoch.global": 0.9150306748466258, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977678571428571, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9967637540453075, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003236245954692557 }, { "epoch": 2.6970600766936514, "grad_norm": 98.05996758161302, "learning_rate": 3.399034733641053e-07, "loss": 0.2274, "step": 12660, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.966824644549763, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.954248366013072, "success_rate.epoch.env.logic": 0.9103869653767821, "success_rate.epoch.env.math": 0.9750271444082519, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8680800942285041, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8722111302318339, "success_rate.epoch.global": 0.9150076569678407, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988541666666667, "tokens_p.mean_in_band": 0.625, "tokens_rate.above_band": 0.974025974025974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025974025974025976 }, { "epoch": 2.698125266297401, "grad_norm": 330.8750113513593, "learning_rate": 3.398739556044717e-07, "loss": 0.2343, "step": 12665, "success_rate.epoch.env.abd": 0.9862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9543973941368078, "success_rate.epoch.env.logic": 0.9098277608915907, "success_rate.epoch.env.math": 0.9750406945198047, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8681318681318682, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8722080987171467, "success_rate.epoch.global": 0.9149847094801223, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.8, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990974729241877, "tokens_p.mean_in_band": 0.7963169642857143, "tokens_rate.above_band": 0.9875222816399287, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012477718360071301 }, { "epoch": 2.6991904559011504, "grad_norm": 51.17089504857138, "learning_rate": 3.398444525125547e-07, "loss": 0.1257, "step": 12670, "success_rate.epoch.env.abd": 0.9862542955326461, "success_rate.epoch.env.agentgym:alfworld": 0.8826530612244898, "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.910010111223458, "success_rate.epoch.env.math": 0.9745533297238765, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8682352941176471, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8722075415272638, "success_rate.epoch.global": 0.9149618320610687, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965415019762845, "tokens_p.mean_below_band": 7.729977369308472e-08, "tokens_p.mean_in_band": 0.7769097222222222, "tokens_rate.above_band": 0.9806201550387597, "tokens_rate.below_band": 0.001937984496124031, "tokens_rate.in_band": 0.01744186046511628 }, { "epoch": 2.7002556455048996, "grad_norm": 430.729366643276, "learning_rate": 3.398149641098558e-07, "loss": 0.5388, "step": 12675, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, "success_rate.epoch.env.agentgym:textcraft": 0.9767441860465116, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9101917255297679, "success_rate.epoch.env.math": 0.9745945945945946, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8683385579937304, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8718900901587971, "success_rate.epoch.global": 0.9149390243902439, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994070208728653, "tokens_p.mean_in_band": 0.710546875, "tokens_rate.above_band": 0.9906015037593985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009398496240601503 }, { "epoch": 2.7013208351086493, "grad_norm": 6.7423208327181605, "learning_rate": 3.39785490417866e-07, "loss": 0.1175, "step": 12680, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.9104627766599598, "success_rate.epoch.env.math": 0.974622030237581, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8680501174628035, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8716724570814169, "success_rate.epoch.global": 0.9147640791476408, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939060642092746, "tokens_p.mean_in_band": 0.7752700617283951, "tokens_rate.above_band": 0.9121475054229935, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0878524945770065 }, { "epoch": 2.702386024712399, "grad_norm": 151.1336897590604, "learning_rate": 3.397560314580656e-07, "loss": 0.227, "step": 12685, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.9105527638190954, "success_rate.epoch.env.math": 0.9741379310344828, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8683079327862446, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8716600664704605, "success_rate.epoch.global": 0.914741641337386, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993844696969697, "tokens_p.mean_in_band": 0.689453125, "tokens_rate.above_band": 0.9705882352941176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029411764705882353 }, { "epoch": 2.703451214316148, "grad_norm": 41.35779551554649, "learning_rate": 3.3972658725192395e-07, "loss": 0.2901, "step": 12690, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8787878787878788, "success_rate.epoch.env.agentgym:sciworld": 0.9671361502347418, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.9106425702811245, "success_rate.epoch.env.math": 0.9742074153680816, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8685134607881388, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8716932318156899, "success_rate.epoch.global": 0.9148710166919575, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9885869565217391, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9745762711864406, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025423728813559324 }, { "epoch": 2.704516403919898, "grad_norm": 253.11477489110263, "learning_rate": 3.396971578208998e-07, "loss": 0.1446, "step": 12695, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9516129032258065, "success_rate.epoch.env.logic": 0.9107321965897693, "success_rate.epoch.env.math": 0.9742212674543501, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8687694704049844, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8718500662352632, "success_rate.epoch.global": 0.915, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9982002617801047, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.705581593523647, "grad_norm": 75.85965181853233, "learning_rate": 3.3966774318644115e-07, "loss": 0.2657, "step": 12700, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9672897196261683, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9519230769230769, "success_rate.epoch.env.logic": 0.9099099099099099, "success_rate.epoch.env.math": 0.9742489270386266, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8689735614307932, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8718245787468537, "success_rate.epoch.global": 0.9149773071104387, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985342401500938, "tokens_p.mean_in_band": 0.6216517857142857, "tokens_rate.above_band": 0.987037037037037, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012962962962962963 }, { "epoch": 2.706646783127397, "grad_norm": 64.85725402361429, "learning_rate": 3.3963834336998517e-07, "loss": 0.168, "step": 12705, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9519230769230769, "success_rate.epoch.env.logic": 0.91, "success_rate.epoch.env.math": 0.974304068522484, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8691770186335404, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8718701087119122, "success_rate.epoch.global": 0.9151057401812689, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983836206896551, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.997134670487106, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0028653295128939827 }, { "epoch": 2.707711972731146, "grad_norm": 102.2612461823331, "learning_rate": 3.3960895839295816e-07, "loss": 0.2986, "step": 12710, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9674418604651163, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.952076677316294, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9743178170144462, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8690937257939582, "success_rate.epoch.env.webshop": 0.972972972972973, "success_rate.epoch.env_macro_mean": 0.8717993558155837, "success_rate.epoch.global": 0.9149321266968325, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979895931882686, "tokens_p.mean_in_band": 0.5779854910714286, "tokens_rate.above_band": 0.9741935483870968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025806451612903226 }, { "epoch": 2.7087771623348957, "grad_norm": 76.78319581436322, "learning_rate": 3.395795882767758e-07, "loss": 0.2994, "step": 12715, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.952076677316294, "success_rate.epoch.env.logic": 0.9090909090909091, "success_rate.epoch.env.math": 0.9743863393810032, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8692456479690522, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8718977571085353, "success_rate.epoch.global": 0.9150602409638554, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966517857142857, "tokens_p.mean_in_band": 0.8190104166666666, "tokens_rate.above_band": 0.9837837837837838, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016216216216216217 }, { "epoch": 2.709842351938645, "grad_norm": 118.15155090805835, "learning_rate": 3.395502330428427e-07, "loss": 0.1937, "step": 12720, "success_rate.epoch.env.abd": 0.9863481228668942, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9522292993630573, "success_rate.epoch.env.logic": 0.9093625498007968, "success_rate.epoch.env.math": 0.9744408945686901, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8693467336683417, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719504756215923, "success_rate.epoch.global": 0.915187969924812, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981904231625836, "tokens_p.mean_in_band": 0.80234375, "tokens_rate.above_band": 0.9889867841409692, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011013215859030838 }, { "epoch": 2.7109075415423947, "grad_norm": 139.73339175908865, "learning_rate": 3.395208927125529e-07, "loss": 0.1715, "step": 12725, "success_rate.epoch.env.abd": 0.9864406779661017, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.9675925925925926, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9522292993630573, "success_rate.epoch.env.logic": 0.9093625498007968, "success_rate.epoch.env.math": 0.9744544970729111, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8696993060909792, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719921783512348, "success_rate.epoch.global": 0.9153153153153153, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954819277108434, "tokens_p.mean_in_band": 0.8149857954545454, "tokens_rate.above_band": 0.9378531073446328, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.062146892655367235 }, { "epoch": 2.711972731146144, "grad_norm": 83.42112473422877, "learning_rate": 3.3949156730728926e-07, "loss": 0.2264, "step": 12730, "success_rate.epoch.env.abd": 0.9865319865319865, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9772727272727273, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9093625498007968, "success_rate.epoch.env.math": 0.9744680851063829, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8695652173913043, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8720168877884035, "success_rate.epoch.global": 0.9152923538230885, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994783464566929, "tokens_p.mean_in_band": 0.47098214285714285, "tokens_rate.above_band": 0.9978001257071024, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0021998742928975488 }, { "epoch": 2.7130379207498936, "grad_norm": 267.4328429181617, "learning_rate": 3.3946225684842396e-07, "loss": 0.2145, "step": 12735, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.9087301587301587, "success_rate.epoch.env.math": 0.9745087626128518, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8696153846153846, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8720176785921333, "success_rate.epoch.global": 0.9152694610778443, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978309768637532, "tokens_p.mean_in_band": 0.4552083333333333, "tokens_rate.above_band": 0.9810844892812106, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018915510718789406 }, { "epoch": 2.714103110353643, "grad_norm": 105.2856814348994, "learning_rate": 3.394329613573183e-07, "loss": 0.2259, "step": 12740, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9525316455696202, "success_rate.epoch.env.logic": 0.908820614469772, "success_rate.epoch.env.math": 0.9745627980922098, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.869431643625192, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8720278098119011, "success_rate.epoch.global": 0.9152466367713005, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965277777777778, "tokens_p.mean_in_band": 0.6986607142857143, "tokens_rate.above_band": 0.9909326424870466, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009067357512953367 }, { "epoch": 2.7151682999573925, "grad_norm": 133.42069168673714, "learning_rate": 3.3940368085532264e-07, "loss": 0.287, "step": 12745, "success_rate.epoch.env.abd": 0.9865771812080537, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9525316455696202, "success_rate.epoch.env.logic": 0.907920792079208, "success_rate.epoch.env.math": 0.9746031746031746, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8693486590038314, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719421343118139, "success_rate.epoch.global": 0.9150746268656716, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992264851485149, "tokens_p.mean_in_band": 0.6047894021739131, "tokens_rate.above_band": 0.9634340222575517, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03656597774244833 }, { "epoch": 2.7162334895611417, "grad_norm": 13.830513736295519, "learning_rate": 3.393744153637764e-07, "loss": 0.1776, "step": 12750, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.88, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9525316455696202, "success_rate.epoch.env.logic": 0.9071146245059288, "success_rate.epoch.env.math": 0.9746434231378764, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8694986605434366, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8718942768535995, "success_rate.epoch.global": 0.9150521609538003, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980376766091051, "tokens_p.mean_in_band": 0.47265625, "tokens_rate.above_band": 0.9754977029096478, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02450229709035222 }, { "epoch": 2.7172986791648914, "grad_norm": 148.4499199044527, "learning_rate": 3.393451649040079e-07, "loss": 0.2425, "step": 12755, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8756218905472637, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9525316455696202, "success_rate.epoch.env.logic": 0.9072063178677197, "success_rate.epoch.env.math": 0.9747235387045814, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8695984703632887, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8715209595168368, "success_rate.epoch.global": 0.9150297619047619, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978153495440729, "tokens_p.mean_below_band": 4.94765117764473e-09, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.987987987987988, "tokens_rate.below_band": 0.003003003003003003, "tokens_rate.in_band": 0.009009009009009009 }, { "epoch": 2.7183638687686407, "grad_norm": 122.44707007883592, "learning_rate": 3.3931592949733487e-07, "loss": 0.2001, "step": 12760, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9525316455696202, "success_rate.epoch.env.logic": 0.9072978303747534, "success_rate.epoch.env.math": 0.9747899159663865, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8697478991596639, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8716048733150484, "success_rate.epoch.global": 0.9151560178306092, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978582554517134, "tokens_p.mean_in_band": 0.7252604166666666, "tokens_rate.above_band": 0.963963963963964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036036036036036036 }, { "epoch": 2.7194290583723904, "grad_norm": 110.02481614687227, "learning_rate": 3.3928670916506373e-07, "loss": 0.2534, "step": 12765, "success_rate.epoch.env.abd": 0.9867549668874173, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9526813880126183, "success_rate.epoch.env.logic": 0.9075712881022615, "success_rate.epoch.env.math": 0.974816369359916, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8698473282442748, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8716628172122665, "success_rate.epoch.global": 0.9152818991097923, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993361928104575, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.99836867862969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0016313213703099511 }, { "epoch": 2.7204942479761396, "grad_norm": 93.5136467802139, "learning_rate": 3.392575039284902e-07, "loss": 0.2452, "step": 12770, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9526813880126183, "success_rate.epoch.env.logic": 0.9076620825147348, "success_rate.epoch.env.math": 0.9748427672955975, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8697638994668697, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8713698305321131, "success_rate.epoch.global": 0.9151111111111111, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9911830357142857, "tokens_p.mean_in_band": 0.245605762012012, "tokens_rate.above_band": 0.14395886889460155, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.8560411311053985 }, { "epoch": 2.7215594375798893, "grad_norm": 29.63435732336514, "learning_rate": 3.3922831380889865e-07, "loss": 0.1235, "step": 12775, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9526813880126183, "success_rate.epoch.env.logic": 0.9069539666993144, "success_rate.epoch.env.math": 0.974869109947644, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8700114025085519, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8713303514301413, "success_rate.epoch.global": 0.9150887573964497, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996309963099631, "tokens_p.mean_in_band": 0.6375, "tokens_rate.above_band": 0.9475524475524476, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05244755244755245 }, { "epoch": 2.722624627183639, "grad_norm": 584.5256637068097, "learning_rate": 3.3919913882756275e-07, "loss": 0.111, "step": 12780, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9529780564263323, "success_rate.epoch.env.logic": 0.9070450097847358, "success_rate.epoch.env.math": 0.9748953974895398, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.870257966616085, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8713904026254653, "success_rate.epoch.global": 0.9152141802067947, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965923172242875, "tokens_p.mean_in_band": 0.6982421875, "tokens_rate.above_band": 0.9901840490797545, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0098159509202454 }, { "epoch": 2.723689816787388, "grad_norm": 244.25106779476602, "learning_rate": 3.39169979005745e-07, "loss": 0.4608, "step": 12785, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8762376237623762, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9529780564263323, "success_rate.epoch.env.logic": 0.9070450097847358, "success_rate.epoch.env.math": 0.974934725848564, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8705526116578349, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8714342159139911, "success_rate.epoch.global": 0.9153392330383481, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9938271604938271, "tokens_p.mean_in_band": 0.7083333333333334, "tokens_rate.above_band": 0.9642857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03571428571428571 }, { "epoch": 2.7247550063911374, "grad_norm": 112.75010656510469, "learning_rate": 3.391408343646969e-07, "loss": 0.2757, "step": 12790, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8768472906403941, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9529780564263323, "success_rate.epoch.env.logic": 0.9070450097847358, "success_rate.epoch.env.math": 0.9749739311783108, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8704682779456193, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8714855375954046, "success_rate.epoch.global": 0.9153166421207658, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970518867924528, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9706959706959707, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029304029304029304 }, { "epoch": 2.725820195994887, "grad_norm": 87.0786076861541, "learning_rate": 3.3911170492565874e-07, "loss": 0.2456, "step": 12795, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9070450097847358, "success_rate.epoch.env.math": 0.9750260145681582, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8706148623161071, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8716261831573514, "success_rate.epoch.global": 0.9154411764705882, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989804241435563, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9967479674796748, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0032520325203252032 }, { "epoch": 2.726885385598637, "grad_norm": 771.4129792867897, "learning_rate": 3.3908259070985995e-07, "loss": 0.3118, "step": 12800, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.9070450097847358, "success_rate.epoch.env.math": 0.9750649350649351, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8709556057185854, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8716606980572835, "success_rate.epoch.global": 0.9155653450807636, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923469387755102, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9865771812080537, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013422818791946308 }, { "epoch": 2.727950575202386, "grad_norm": 49.76267568919681, "learning_rate": 3.390534917385188e-07, "loss": 0.2392, "step": 12805, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8780487804878049, "success_rate.epoch.env.agentgym:sciworld": 0.9678899082568807, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9072265625, "success_rate.epoch.env.math": 0.9750908147379346, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8711978971085242, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8717148573132459, "success_rate.epoch.global": 0.9156891495601173, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981060606060606, "tokens_p.mean_in_band": 0.8095703125, "tokens_rate.above_band": 0.9867109634551495, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013289036544850499 }, { "epoch": 2.7290157648061353, "grad_norm": 155.5212812410368, "learning_rate": 3.3902440803284225e-07, "loss": 0.3043, "step": 12810, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.972972972972973, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9072265625, "success_rate.epoch.env.math": 0.9751295336787564, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8714392803598201, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8718074682065938, "success_rate.epoch.global": 0.9158125915080527, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963010204081633, "tokens_p.mean_in_band": 0.8323863636363636, "tokens_rate.above_band": 0.9780439121756487, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021956087824351298 }, { "epoch": 2.730080954409885, "grad_norm": 0.0, "learning_rate": 3.3899533961402645e-07, "loss": 0.2091, "step": 12815, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9074074074074074, "success_rate.epoch.env.math": 0.975168132436627, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8716317365269461, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719095716052979, "success_rate.epoch.global": 0.9159356725146199, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9944029850746269, "tokens_p.mean_in_band": 0.7353515625, "tokens_rate.above_band": 0.9710144927536232, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028985507246376812 }, { "epoch": 2.7311461440136346, "grad_norm": 21.88860198382124, "learning_rate": 3.389662865032562e-07, "loss": 0.1709, "step": 12820, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9680365296803652, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9074074074074074, "success_rate.epoch.env.math": 0.9752194114610222, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8719193427931292, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719403793589868, "success_rate.epoch.global": 0.916058394160584, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9941964285714285, "tokens_p.mean_in_band": 0.8153409090909091, "tokens_rate.above_band": 0.9271523178807947, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0728476821192053 }, { "epoch": 2.732211333617384, "grad_norm": 681.6245002894009, "learning_rate": 3.3893724872170517e-07, "loss": 0.3349, "step": 12825, "success_rate.epoch.env.abd": 0.9834983498349835, "success_rate.epoch.env.agentgym:alfworld": 0.8786407766990292, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9075875486381323, "success_rate.epoch.env.math": 0.9752704791344667, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8716896680343156, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719537268723332, "success_rate.epoch.global": 0.9160349854227405, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967783505154639, "tokens_p.mean_in_band": 0.5879720052083334, "tokens_rate.above_band": 0.9938524590163934, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006147540983606557 }, { "epoch": 2.733276523221133, "grad_norm": 77.80873807809769, "learning_rate": 3.389082262905359e-07, "loss": 0.3259, "step": 12830, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8792270531400966, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9777777777777777, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9067055393586005, "success_rate.epoch.env.math": 0.975295934122491, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8711839166046166, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8718881134599535, "success_rate.epoch.global": 0.9157205240174673, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.997957768187423, "tokens_p.mean_in_band": 0.5323893229166666, "tokens_rate.above_band": 0.9574970484061394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04250295159386069 }, { "epoch": 2.734341712824883, "grad_norm": 61.20504537149271, "learning_rate": 3.3887921923089975e-07, "loss": 0.3251, "step": 12835, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.9532710280373832, "success_rate.epoch.env.logic": 0.9067961165048544, "success_rate.epoch.env.math": 0.9753213367609255, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.8713754646840148, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8720650536082306, "success_rate.epoch.global": 0.9158430232558139, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996016288951841, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.995768688293371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004231311706629055 }, { "epoch": 2.7354069024286325, "grad_norm": 403.59498306493344, "learning_rate": 3.388502275639368e-07, "loss": 0.4661, "step": 12840, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.953416149068323, "success_rate.epoch.env.logic": 0.9067961165048544, "success_rate.epoch.env.math": 0.9753846153846154, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8711474192350538, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8717718923355524, "success_rate.epoch.global": 0.9156748911465893, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981534090909091, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.9606986899563319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039301310043668124 }, { "epoch": 2.7364720920323817, "grad_norm": 94.65129535578392, "learning_rate": 3.388212513107761e-07, "loss": 0.2215, "step": 12845, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8803827751196173, "success_rate.epoch.env.agentgym:sciworld": 0.9681818181818181, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9736842105263158, "success_rate.epoch.env.ded": 0.953416149068323, "success_rate.epoch.env.logic": 0.906886517943744, "success_rate.epoch.env.math": 0.9754350051177073, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8713862120088954, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8718063999669909, "success_rate.epoch.global": 0.9157971014492754, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919554455445545, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9099099099099099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09009009009009009 }, { "epoch": 2.737537281636131, "grad_norm": 19.6105710498484, "learning_rate": 3.3879229049253523e-07, "loss": 0.1336, "step": 12850, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.9684684684684685, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.953416149068323, "success_rate.epoch.env.logic": 0.9069767441860465, "success_rate.epoch.env.math": 0.9754475703324809, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8715766099185789, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719722371777938, "success_rate.epoch.global": 0.9159189580318379, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973834913112164, "tokens_p.mean_in_band": 0.61328125, "tokens_rate.above_band": 0.9968503937007874, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0031496062992125984 }, { "epoch": 2.7386024712398807, "grad_norm": 139.745001512881, "learning_rate": 3.3876334513032063e-07, "loss": 0.2752, "step": 12855, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.9684684684684685, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.953416149068323, "success_rate.epoch.env.logic": 0.9072463768115943, "success_rate.epoch.env.math": 0.9755102040816327, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8716715976331361, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8720110784586353, "success_rate.epoch.global": 0.9160404624277456, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965277777777778, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9926470588235294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007352941176470588 }, { "epoch": 2.7396676608436303, "grad_norm": 89.58953375412416, "learning_rate": 3.387344152452275e-07, "loss": 0.2905, "step": 12860, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8809523809523809, "success_rate.epoch.env.agentgym:sciworld": 0.9684684684684685, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9535603715170279, "success_rate.epoch.env.logic": 0.9072463768115943, "success_rate.epoch.env.math": 0.9750254841997962, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8712652157875322, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8719431803423867, "success_rate.epoch.global": 0.9157287157287157, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.7380952380952381, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981203007518797, "tokens_p.mean_in_band": 0.5301339285714286, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 2.7407328504473796, "grad_norm": 194.36245017305367, "learning_rate": 3.387055008583397e-07, "loss": 0.2477, "step": 12865, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9535603715170279, "success_rate.epoch.env.logic": 0.9072463768115943, "success_rate.epoch.env.math": 0.9750382068262863, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.8715495031284505, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8720470665445944, "success_rate.epoch.global": 0.915850144092219, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985974754558204, "tokens_p.mean_in_band": 0.7728794642857143, "tokens_rate.above_band": 0.9902777777777778, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009722222222222222 }, { "epoch": 2.7417980400511293, "grad_norm": 123.90996189365873, "learning_rate": 3.386766019907299e-07, "loss": 0.1662, "step": 12870, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9535603715170279, "success_rate.epoch.env.logic": 0.9074252651880425, "success_rate.epoch.env.math": 0.9750889679715302, "success_rate.epoch.env.sat": 0.12195121951219512, "success_rate.epoch.env.science": 0.8716911764705882, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8718036613060511, "success_rate.epoch.global": 0.9158273381294963, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996050552922591, "tokens_p.mean_in_band": 0.6830357142857143, "tokens_rate.above_band": 0.9783616692426584, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021638330757341576 }, { "epoch": 2.7428632296548785, "grad_norm": 291.7152713621318, "learning_rate": 3.3864771866345927e-07, "loss": 0.3322, "step": 12875, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9535603715170279, "success_rate.epoch.env.logic": 0.9074252651880425, "success_rate.epoch.env.math": 0.9751269035532995, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8719735876742479, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8737333222269945, "success_rate.epoch.global": 0.915948275862069, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923427152317881, "tokens_p.mean_in_band": 0.818359375, "tokens_rate.above_band": 0.9617834394904459, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03821656050955414 }, { "epoch": 2.743928419258628, "grad_norm": 69.54202925737356, "learning_rate": 3.3861885089757785e-07, "loss": 0.2093, "step": 12880, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8815165876777251, "success_rate.epoch.env.agentgym:sciworld": 0.9690265486725663, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9535603715170279, "success_rate.epoch.env.logic": 0.9075144508670521, "success_rate.epoch.env.math": 0.9751647237709072, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8717948717948718, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.873753762107886, "success_rate.epoch.global": 0.9159253945480631, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982755016722408, "tokens_p.mean_in_band": 0.689453125, "tokens_rate.above_band": 0.9867986798679867, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013201320132013201 }, { "epoch": 2.7449936088623774, "grad_norm": 289.29992965259237, "learning_rate": 3.385899987141243e-07, "loss": 0.2662, "step": 12885, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9690265486725663, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9535603715170279, "success_rate.epoch.env.logic": 0.9076034648700674, "success_rate.epoch.env.math": 0.9746963562753036, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8720292504570384, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8737913902160649, "success_rate.epoch.global": 0.9159025787965616, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966814159292036, "tokens_p.mean_in_band": 0.575, "tokens_rate.above_band": 0.9890590809628009, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010940919037199124 }, { "epoch": 2.746058798466127, "grad_norm": 14.151230101867, "learning_rate": 3.385611621341258e-07, "loss": 0.0535, "step": 12890, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9539877300613497, "success_rate.epoch.env.logic": 0.9077809798270894, "success_rate.epoch.env.math": 0.9747091552857865, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.8721694667640614, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8738726934652464, "success_rate.epoch.global": 0.9160228898426324, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9996811224489796, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.7471239880698763, "grad_norm": 26.00191983956801, "learning_rate": 3.3853234117859824e-07, "loss": 0.2004, "step": 12895, "success_rate.epoch.env.abd": 0.9835526315789473, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9782608695652174, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9541284403669725, "success_rate.epoch.env.logic": 0.9069097888675623, "success_rate.epoch.env.math": 0.9747729566094854, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.871897810218978, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8755995315508817, "success_rate.epoch.global": 0.9158571428571428, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989251592356688, "tokens_p.mean_in_band": 0.69296875, "tokens_rate.above_band": 0.9751552795031055, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024844720496894408 }, { "epoch": 2.748189177673626, "grad_norm": 60.21817987872554, "learning_rate": 3.385035358685462e-07, "loss": 0.1821, "step": 12900, "success_rate.epoch.env.abd": 0.9836601307189542, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9541284403669725, "success_rate.epoch.env.logic": 0.9069990412272292, "success_rate.epoch.env.math": 0.9747983870967742, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8720845481049563, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8756787546929765, "success_rate.epoch.global": 0.9159771754636234, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957865168539326, "tokens_p.mean_in_band": 0.8645833333333334, "tokens_rate.above_band": 0.9834254143646409, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016574585635359115 }, { "epoch": 2.7492543672773753, "grad_norm": 109.7369405967248, "learning_rate": 3.384747462249627e-07, "loss": 0.2379, "step": 12905, "success_rate.epoch.env.abd": 0.9837133550488599, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9541284403669725, "success_rate.epoch.env.logic": 0.907177033492823, "success_rate.epoch.env.math": 0.9748237663645518, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8723172062568206, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8757232323307167, "success_rate.epoch.global": 0.9160968660968661, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9940476190476191, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9545454545454546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045454545454545456 }, { "epoch": 2.750319556881125, "grad_norm": 111.7120308445304, "learning_rate": 3.384459722688295e-07, "loss": 0.3157, "step": 12910, "success_rate.epoch.env.abd": 0.9837662337662337, "success_rate.epoch.env.agentgym:alfworld": 0.8820754716981132, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9541284403669725, "success_rate.epoch.env.logic": 0.907177033492823, "success_rate.epoch.env.math": 0.9748743718592965, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8725490196078431, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8757537139273659, "success_rate.epoch.global": 0.9162162162162162, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995345744680851, "tokens_p.mean_in_band": 0.5693359375, "tokens_rate.above_band": 0.9591836734693877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04081632653061224 }, { "epoch": 2.751384746484874, "grad_norm": 106.38458675061695, "learning_rate": 3.384172140211168e-07, "loss": 0.1548, "step": 12915, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, "success_rate.epoch.env.agentgym:sciworld": 0.9691629955947136, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9541284403669725, "success_rate.epoch.env.logic": 0.9074427480916031, "success_rate.epoch.env.math": 0.9748995983935743, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8723249909321726, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8753881002473701, "success_rate.epoch.global": 0.9160511363636363, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9959546925566343, "tokens_p.mean_in_band": 0.6899038461538461, "tokens_rate.above_band": 0.9596273291925466, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040372670807453416 }, { "epoch": 2.752449936088624, "grad_norm": 963.3624042076193, "learning_rate": 3.383884715027834e-07, "loss": 0.1965, "step": 12920, "success_rate.epoch.env.abd": 0.9838187702265372, "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9787234042553191, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9541284403669725, "success_rate.epoch.env.logic": 0.9076190476190477, "success_rate.epoch.env.math": 0.974937343358396, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8721477725461789, "success_rate.epoch.env.webshop": 0.9736842105263158, "success_rate.epoch.env_macro_mean": 0.8754037435315151, "success_rate.epoch.global": 0.9160283687943263, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962121212121212, "tokens_p.mean_in_band": 0.5712890625, "tokens_rate.above_band": 0.9763313609467456, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023668639053254437 }, { "epoch": 2.753515125692373, "grad_norm": 41.79778241882147, "learning_rate": 3.383597447347767e-07, "loss": 0.1631, "step": 12925, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8779342723004695, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9542682926829268, "success_rate.epoch.env.logic": 0.9078822412155746, "success_rate.epoch.env.math": 0.9749498997995992, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8722403184943902, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.875556322809815, "success_rate.epoch.global": 0.9161473087818697, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987999231950845, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9977011494252873, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0022988505747126436 }, { "epoch": 2.754580315296123, "grad_norm": 53.378081761554114, "learning_rate": 3.3833103373803254e-07, "loss": 0.1884, "step": 12930, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8785046728971962, "success_rate.epoch.env.agentgym:sciworld": 0.9692982456140351, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9070208728652751, "success_rate.epoch.env.math": 0.974974974974975, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.872471098265896, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8755657672955613, "success_rate.epoch.global": 0.9161244695898161, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977627020785219, "tokens_p.mean_in_band": 0.7125538793103449, "tokens_rate.above_band": 0.9675977653631285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03240223463687151 }, { "epoch": 2.755645504899872, "grad_norm": 129.9964445162915, "learning_rate": 3.3830233853347524e-07, "loss": 0.3093, "step": 12935, "success_rate.epoch.env.abd": 0.9838709677419355, "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, "success_rate.epoch.env.agentgym:sciworld": 0.9694323144104804, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9071969696969697, "success_rate.epoch.env.math": 0.9750124937531235, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8726091663659329, "success_rate.epoch.env.webshop": 0.9743589743589743, "success_rate.epoch.env_macro_mean": 0.8756612988456512, "success_rate.epoch.global": 0.9162429378531074, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957842612419701, "tokens_p.mean_in_band": 0.74375, "tokens_rate.above_band": 0.989406779661017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01059322033898305 }, { "epoch": 2.7567106945036217, "grad_norm": 117.04270256510601, "learning_rate": 3.382736591420177e-07, "loss": 0.2535, "step": 12940, "success_rate.epoch.env.abd": 0.9839228295819936, "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9074598677998111, "success_rate.epoch.env.math": 0.9745254745254746, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8727469358327326, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8757285205550968, "success_rate.epoch.global": 0.9162200282087447, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998007741347905, "tokens_p.mean_in_band": 0.4586397058823529, "tokens_rate.above_band": 0.9699646643109541, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030035335689045935 }, { "epoch": 2.757775884107371, "grad_norm": 288.49917763886185, "learning_rate": 3.382449955845613e-07, "loss": 0.3495, "step": 12945, "success_rate.epoch.env.abd": 0.9839228295819936, "success_rate.epoch.env.agentgym:alfworld": 0.8790697674418605, "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9066918001885014, "success_rate.epoch.env.math": 0.9745508982035929, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8726618705035971, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8756532742585216, "success_rate.epoch.global": 0.916056338028169, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992827868852459, "tokens_p.mean_in_band": 0.5480587121212122, "tokens_rate.above_band": 0.9486780715396579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05132192846034215 }, { "epoch": 2.7588410737111206, "grad_norm": 73.63117908408128, "learning_rate": 3.382163478819957e-07, "loss": 0.1572, "step": 12950, "success_rate.epoch.env.abd": 0.9840255591054313, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9066918001885014, "success_rate.epoch.env.math": 0.9745762711864406, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8728904847396768, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8757365996158064, "success_rate.epoch.global": 0.9161744022503516, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978380503144654, "tokens_p.mean_in_band": 0.8190104166666666, "tokens_rate.above_band": 0.9906542056074766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009345794392523364 }, { "epoch": 2.7599062633148703, "grad_norm": 36.279593757060574, "learning_rate": 3.3818771605519924e-07, "loss": 0.1955, "step": 12955, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9695652173913043, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9067796610169492, "success_rate.epoch.env.math": 0.9746394828443561, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8730272596843616, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8757673924745916, "success_rate.epoch.global": 0.9162921348314607, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967648678414097, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9978021978021978, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002197802197802198 }, { "epoch": 2.7609714529186196, "grad_norm": 498.2493770578576, "learning_rate": 3.381591001250386e-07, "loss": 0.3686, "step": 12960, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9068673565380997, "success_rate.epoch.env.math": 0.9746772591857001, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8728965270318654, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8757788917032883, "success_rate.epoch.global": 0.9162692847124825, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961538461538462, "tokens_p.mean_in_band": 0.6569010416666666, "tokens_rate.above_band": 0.9558823529411765, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04411764705882353 }, { "epoch": 2.762036642522369, "grad_norm": 129.86885318783732, "learning_rate": 3.3813050011236886e-07, "loss": 0.1692, "step": 12965, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8796296296296297, "success_rate.epoch.env.agentgym:sciworld": 0.9698275862068966, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9061913696060038, "success_rate.epoch.env.math": 0.9747023809523809, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8730782981766178, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.875748121020494, "success_rate.epoch.global": 0.9162464985994397, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963959854014599, "tokens_p.mean_in_band": 0.7291666666666666, "tokens_rate.above_band": 0.9913169319826338, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008683068017366137 }, { "epoch": 2.7631018321261185, "grad_norm": 71.0039974069899, "learning_rate": 3.381019160380334e-07, "loss": 0.1114, "step": 12970, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, "success_rate.epoch.env.agentgym:sciworld": 0.9698275862068966, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9544072948328267, "success_rate.epoch.env.logic": 0.9062792877225867, "success_rate.epoch.env.math": 0.9747524752475247, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.8732595501606569, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8758275725262551, "success_rate.epoch.global": 0.9163636363636364, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981897865853658, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9939393939393939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006060606060606061 }, { "epoch": 2.764167021729868, "grad_norm": 80.35148605776824, "learning_rate": 3.3807334792286423e-07, "loss": 0.2656, "step": 12975, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.880184331797235, "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9062792877225867, "success_rate.epoch.env.math": 0.974308300395257, "success_rate.epoch.env.sat": 0.16279069767441862, "success_rate.epoch.env.science": 0.87339514978602, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8758355241957365, "success_rate.epoch.global": 0.916340782122905, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993332147937412, "tokens_p.mean_in_band": 0.5126953125, "tokens_rate.above_band": 0.9971631205673759, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0028368794326241137 }, { "epoch": 2.7652322113336174, "grad_norm": 308.8120839830239, "learning_rate": 3.380447957876815e-07, "loss": 0.3048, "step": 12980, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8807339449541285, "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9548192771084337, "success_rate.epoch.env.logic": 0.9063670411985019, "success_rate.epoch.env.math": 0.9743463246176616, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8734853884533144, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.875593675423189, "success_rate.epoch.global": 0.9163179916317992, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980904817861339, "tokens_p.mean_in_band": 0.7134046052631579, "tokens_rate.above_band": 0.9781609195402299, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021839080459770115 }, { "epoch": 2.7662974009373666, "grad_norm": 20.06048388118293, "learning_rate": 3.380162596532939e-07, "loss": 0.5705, "step": 12985, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8767123287671232, "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.954954954954955, "success_rate.epoch.env.logic": 0.9064546304957904, "success_rate.epoch.env.math": 0.9744094488188977, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8732193732193733, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8752299263253799, "success_rate.epoch.global": 0.9161559888579387, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992523923444976, "tokens_p.mean_in_band": 0.4013671875, "tokens_rate.above_band": 0.9858490566037735, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014150943396226415 }, { "epoch": 2.7673625905411163, "grad_norm": 69.499480929879, "learning_rate": 3.379877395404982e-07, "loss": 0.2843, "step": 12990, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8767123287671232, "success_rate.epoch.env.agentgym:sciworld": 0.9700854700854701, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.954954954954955, "success_rate.epoch.env.logic": 0.9058713886300093, "success_rate.epoch.env.math": 0.9744597249508841, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8733096085409253, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8751896781060851, "success_rate.epoch.global": 0.9161335187760778, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999484323432343, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9742765273311897, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02572347266881029 }, { "epoch": 2.768427780144866, "grad_norm": 444.4493964196311, "learning_rate": 3.3795923547007975e-07, "loss": 0.3772, "step": 12995, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8772727272727273, "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9550898203592815, "success_rate.epoch.env.logic": 0.9059590316573557, "success_rate.epoch.env.math": 0.9744973025993134, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8734447209384998, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8752881229111084, "success_rate.epoch.global": 0.91625, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9997165532879818, "tokens_p.mean_in_band": 0.830078125, "tokens_rate.above_band": 0.9954853273137697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004514672686230248 }, { "epoch": 2.7694929697486153, "grad_norm": 811.1075631010086, "learning_rate": 3.379307474628121e-07, "loss": 0.2975, "step": 13000, "success_rate.epoch.env.abd": 0.9840764331210191, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9702127659574468, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9553571428571429, "success_rate.epoch.env.logic": 0.9051162790697674, "success_rate.epoch.env.math": 0.9744973025993134, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8736692689850958, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8749472378709281, "success_rate.epoch.global": 0.9160887656033287, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9999617737003058, "tokens_p.mean_in_band": 0.7431640625, "tokens_rate.above_band": 0.9951308581862447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004869141813755326 }, { "epoch": 2.7705581593523645, "grad_norm": 285.1482322404372, "learning_rate": 3.379022755394571e-07, "loss": 0.181, "step": 13005, "success_rate.epoch.env.abd": 0.9841269841269841, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9553571428571429, "success_rate.epoch.env.logic": 0.9053803339517625, "success_rate.epoch.env.math": 0.9745222929936306, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8738036157390996, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8750017978827174, "success_rate.epoch.global": 0.9162049861495845, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972718253968254, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.771623348956114, "grad_norm": 37.65000953425945, "learning_rate": 3.3787381972076493e-07, "loss": 0.3427, "step": 13010, "success_rate.epoch.env.abd": 0.9842271293375394, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9554896142433235, "success_rate.epoch.env.logic": 0.9053803339517625, "success_rate.epoch.env.math": 0.9745472344591287, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8740268931351733, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8750455101971091, "success_rate.epoch.global": 0.9163208852005532, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9956191588785047, "tokens_p.mean_in_band": 0.830078125, "tokens_rate.above_band": 0.981651376146789, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01834862385321101 }, { "epoch": 2.772688538559864, "grad_norm": 63.40570206532535, "learning_rate": 3.3784538002747393e-07, "loss": 0.2462, "step": 13015, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9791666666666666, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9554896142433235, "success_rate.epoch.env.logic": 0.9057301293900185, "success_rate.epoch.env.math": 0.9745969711773327, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8740714538379908, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8750903913863349, "success_rate.epoch.global": 0.9164364640883977, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986111111111111, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9926470588235294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007352941176470588 }, { "epoch": 2.773753728163613, "grad_norm": 332.560713721092, "learning_rate": 3.3781695648031073e-07, "loss": 0.3043, "step": 13020, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9556213017751479, "success_rate.epoch.env.logic": 0.9057301293900185, "success_rate.epoch.env.math": 0.9746465138956607, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8738961497703992, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8751295819545702, "success_rate.epoch.global": 0.9164137931034483, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984884332281808, "tokens_p.mean_in_band": 0.7177734375, "tokens_rate.above_band": 0.9958115183246073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004188481675392671 }, { "epoch": 2.7748189177673623, "grad_norm": 140.19483916508162, "learning_rate": 3.377885490999902e-07, "loss": 0.47, "step": 13025, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9556213017751479, "success_rate.epoch.env.logic": 0.9057301293900185, "success_rate.epoch.env.math": 0.9747204666990763, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8740740740740741, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8751524798733965, "success_rate.epoch.global": 0.9165289256198347, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919181034482759, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9914529914529915, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008547008547008548 }, { "epoch": 2.775884107371112, "grad_norm": 131.39135791827394, "learning_rate": 3.3776015790721555e-07, "loss": 0.2402, "step": 13030, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9556213017751479, "success_rate.epoch.env.logic": 0.9058171745152355, "success_rate.epoch.env.math": 0.9747695293546822, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8742957746478873, "success_rate.epoch.env.webshop": 0.975, "success_rate.epoch.env_macro_mean": 0.8751850079056364, "success_rate.epoch.global": 0.9166437414030262, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.8658854166666666, "tokens_rate.above_band": 0.9538461538461539, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046153846153846156 }, { "epoch": 2.7769492969748617, "grad_norm": 101.11363122012816, "learning_rate": 3.37731782922678e-07, "loss": 0.1789, "step": 13035, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8738738738738738, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9556213017751479, "success_rate.epoch.env.logic": 0.9059040590405905, "success_rate.epoch.env.math": 0.974818401937046, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8744725738396625, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8752688544871866, "success_rate.epoch.global": 0.9167582417582417, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981553819444444, "tokens_p.mean_in_band": 0.798828125, "tokens_rate.above_band": 0.993103448275862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006896551724137931 }, { "epoch": 2.778014486578611, "grad_norm": 292.06019742006487, "learning_rate": 3.3770342416705697e-07, "loss": 0.214, "step": 13040, "success_rate.epoch.env.abd": 0.9842767295597484, "success_rate.epoch.env.agentgym:alfworld": 0.8699551569506726, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.9743589743589743, "success_rate.epoch.env.ded": 0.9556213017751479, "success_rate.epoch.env.logic": 0.906163753449862, "success_rate.epoch.env.math": 0.9748670855485742, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8745607870695713, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8749486612442329, "success_rate.epoch.global": 0.9167352537722908, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988155976676385, "tokens_p.mean_in_band": 0.6432291666666666, "tokens_rate.above_band": 0.9913294797687862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008670520231213872 }, { "epoch": 2.7790796761823606, "grad_norm": 110.62272083868578, "learning_rate": 3.376750816610202e-07, "loss": 0.2493, "step": 13045, "success_rate.epoch.env.abd": 0.9843260188087775, "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9556213017751479, "success_rate.epoch.env.logic": 0.90625, "success_rate.epoch.env.math": 0.9749155812831645, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8746048472075869, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8751327586508908, "success_rate.epoch.global": 0.9168493150684931, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986013427109974, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.78014486578611, "grad_norm": 418.49247014166167, "learning_rate": 3.3764675542522355e-07, "loss": 0.4602, "step": 13050, "success_rate.epoch.env.abd": 0.9843260188087775, "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9556213017751479, "success_rate.epoch.env.logic": 0.9064220183486239, "success_rate.epoch.env.math": 0.9749638902262879, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8744300245527885, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.875136895436068, "success_rate.epoch.global": 0.9168262653898769, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9927591463414634, "tokens_p.mean_in_band": 0.59765625, "tokens_rate.above_band": 0.9213483146067416, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07865168539325842 }, { "epoch": 2.7812100553898595, "grad_norm": 107.4517307768208, "learning_rate": 3.376184454803108e-07, "loss": 0.2007, "step": 13055, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8711111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9557522123893806, "success_rate.epoch.env.logic": 0.9065077910174152, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8745620182200421, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8751807539749472, "success_rate.epoch.global": 0.9169398907103825, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9978966346153846, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.782275244993609, "grad_norm": 50.84041348277853, "learning_rate": 3.375901518469142e-07, "loss": 0.2162, "step": 13060, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8716814159292036, "success_rate.epoch.env.agentgym:sciworld": 0.9703389830508474, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.9065934065934066, "success_rate.epoch.env.math": 0.9745437079731027, "success_rate.epoch.env.sat": 0.1590909090909091, "success_rate.epoch.env.science": 0.8744316194473593, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8751988785337019, "success_rate.epoch.global": 0.9167803547066848, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.86, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967301324503312, "tokens_p.mean_in_band": 0.5295973557692307, "tokens_rate.above_band": 0.9830729166666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016927083333333332 }, { "epoch": 2.7833404345973585, "grad_norm": 43.417265940975334, "learning_rate": 3.3756187454565395e-07, "loss": 0.1949, "step": 13065, "success_rate.epoch.env.abd": 0.9844236760124611, "success_rate.epoch.env.agentgym:alfworld": 0.8716814159292036, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.9066788655077768, "success_rate.epoch.env.math": 0.9745803357314149, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8746070555361509, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8749159077333719, "success_rate.epoch.global": 0.9167574931880109, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971931137724551, "tokens_p.mean_in_band": 0.4967830882352941, "tokens_rate.above_band": 0.907608695652174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09239130434782608 }, { "epoch": 2.7844056242011077, "grad_norm": 53.617423870435076, "learning_rate": 3.375336135971382e-07, "loss": 0.2513, "step": 13070, "success_rate.epoch.env.abd": 0.984472049689441, "success_rate.epoch.env.agentgym:alfworld": 0.8722466960352423, "success_rate.epoch.env.agentgym:sciworld": 0.9704641350210971, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.9068493150684932, "success_rate.epoch.env.math": 0.9746168582375478, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.874738311235171, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8750024424196344, "success_rate.epoch.global": 0.9168707482993197, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967225609756097, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9951456310679612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0048543689320388345 }, { "epoch": 2.7854708138048574, "grad_norm": 101.20777957042894, "learning_rate": 3.3750536902196345e-07, "loss": 0.4592, "step": 13075, "success_rate.epoch.env.abd": 0.984472049689441, "success_rate.epoch.env.agentgym:alfworld": 0.8728070175438597, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9795918367346939, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.9070191431175935, "success_rate.epoch.env.math": 0.9746532759445241, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8745207389334263, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8750636328956317, "success_rate.epoch.global": 0.9168478260869565, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988386824324325, "tokens_p.mean_in_band": 0.4890625, "tokens_rate.above_band": 0.9833887043189369, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016611295681063124 }, { "epoch": 2.7865360034086066, "grad_norm": 57.014725508159245, "learning_rate": 3.374771408407141e-07, "loss": 0.2191, "step": 13080, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9558823529411765, "success_rate.epoch.env.logic": 0.907103825136612, "success_rate.epoch.env.math": 0.9746895893027698, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8745644599303136, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8752174147410655, "success_rate.epoch.global": 0.9169606512890095, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0, "tokens_p.mean_in_band": 0.62109375, "tokens_rate.above_band": 0.9983079526226735, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001692047377326565 }, { "epoch": 2.7876011930123563, "grad_norm": 73.00569698653771, "learning_rate": 3.374489290739626e-07, "loss": 0.2837, "step": 13085, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8733624454148472, "success_rate.epoch.env.agentgym:sciworld": 0.9707112970711297, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9560117302052786, "success_rate.epoch.env.logic": 0.9074410163339383, "success_rate.epoch.env.math": 0.9747137404580153, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8739993038635573, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8752106477910577, "success_rate.epoch.global": 0.9168021680216802, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.000561377245509, "tokens_p.mean_in_band": 0.6723090277777778, "tokens_rate.above_band": 0.9893364928909952, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01066350710900474 }, { "epoch": 2.7886663826161056, "grad_norm": 246.97176089096868, "learning_rate": 3.3742073374226966e-07, "loss": 0.2788, "step": 13090, "success_rate.epoch.env.abd": 0.9845201238390093, "success_rate.epoch.env.agentgym:alfworld": 0.8739130434782608, "success_rate.epoch.env.agentgym:sciworld": 0.9708333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9560117302052786, "success_rate.epoch.env.logic": 0.9057971014492754, "success_rate.epoch.env.math": 0.9747498808956646, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8737830319888734, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8751059739732323, "success_rate.epoch.global": 0.9165087956698241, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961476824457594, "tokens_p.mean_in_band": 0.6041015625, "tokens_rate.above_band": 0.9806576402321083, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019342359767891684 }, { "epoch": 2.7897315722198552, "grad_norm": 58.973753591352065, "learning_rate": 3.373925548661836e-07, "loss": 0.2696, "step": 13095, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.8744588744588745, "success_rate.epoch.env.agentgym:sciworld": 0.970954356846473, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9560117302052786, "success_rate.epoch.env.logic": 0.9057971014492754, "success_rate.epoch.env.math": 0.9747619047619047, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8736549809094064, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8751647092511327, "success_rate.epoch.global": 0.9164864864864865, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993722098214286, "tokens_p.mean_in_band": 0.6015625, "tokens_rate.above_band": 0.9911504424778761, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008849557522123894 }, { "epoch": 2.7907967618236045, "grad_norm": 85.82667345074272, "learning_rate": 3.3736439246624113e-07, "loss": 0.1691, "step": 13100, "success_rate.epoch.env.abd": 0.9846153846153847, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.970954356846473, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.956140350877193, "success_rate.epoch.env.logic": 0.9057971014492754, "success_rate.epoch.env.math": 0.9747979077508322, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8738738738738738, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8752487676298994, "success_rate.epoch.global": 0.9165991902834008, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974075112107623, "tokens_p.mean_in_band": 0.4947916666666667, "tokens_rate.above_band": 0.9933184855233853, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0066815144766146995 }, { "epoch": 2.791861951427354, "grad_norm": 0.0, "learning_rate": 3.3733624656296675e-07, "loss": 0.1779, "step": 13105, "success_rate.epoch.env.abd": 0.9847094801223242, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.956140350877193, "success_rate.epoch.env.logic": 0.9059674502712477, "success_rate.epoch.env.math": 0.9748218527315915, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8740048459674628, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8752978026046335, "success_rate.epoch.global": 0.916711590296496, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986033519553073, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.7929271410311034, "grad_norm": 148.755769224436, "learning_rate": 3.3730811717687307e-07, "loss": 0.2582, "step": 13110, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.956268221574344, "success_rate.epoch.env.logic": 0.9059674502712477, "success_rate.epoch.env.math": 0.9748696064485538, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.8741790528862772, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8753338434020498, "success_rate.epoch.global": 0.9168236877523553, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959177927927928, "tokens_p.mean_in_band": 0.8236607142857143, "tokens_rate.above_band": 0.9844789356984479, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015521064301552107 }, { "epoch": 2.793992330634853, "grad_norm": 39.723803455467966, "learning_rate": 3.3728000432846045e-07, "loss": 0.33, "step": 13115, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9563953488372093, "success_rate.epoch.env.logic": 0.9053201082055906, "success_rate.epoch.env.math": 0.9749171793658306, "success_rate.epoch.env.sat": 0.15555555555555556, "success_rate.epoch.env.science": 0.87426597582038, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8752987780428305, "success_rate.epoch.global": 0.9168010752688172, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987116490891659, "tokens_p.mean_in_band": 0.571546052631579, "tokens_rate.above_band": 0.9821092278719398, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017890772128060263 }, { "epoch": 2.7950575202386023, "grad_norm": 67.07005303066418, "learning_rate": 3.3725190803821746e-07, "loss": 0.2349, "step": 13120, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9563953488372093, "success_rate.epoch.env.logic": 0.9053201082055906, "success_rate.epoch.env.math": 0.9744922059518186, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8744827586206897, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8749724295768506, "success_rate.epoch.global": 0.9166442953020134, "success_rate.window.env.math": 0.75, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9935661764705882, "tokens_p.mean_below_band": 4.731118679046631e-07, "tokens_p.mean_in_band": 0.6099175347222222, "tokens_rate.above_band": 0.8774193548387097, "tokens_rate.below_band": 0.0064516129032258064, "tokens_rate.in_band": 0.11612903225806452 }, { "epoch": 2.796122709842352, "grad_norm": 221.11689550649683, "learning_rate": 3.3722382832662037e-07, "loss": 0.2318, "step": 13125, "success_rate.epoch.env.abd": 0.9847560975609756, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9045904590459046, "success_rate.epoch.env.math": 0.9745403111739745, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.874267998622115, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8749024374274981, "success_rate.epoch.global": 0.9164879356568365, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992093373493975, "tokens_p.mean_in_band": 0.5407608695652174, "tokens_rate.above_band": 0.9730363423212193, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026963657678780773 }, { "epoch": 2.7971878994461012, "grad_norm": 263.2407026877318, "learning_rate": 3.3719576521413355e-07, "loss": 0.3875, "step": 13130, "success_rate.epoch.env.abd": 0.9848024316109423, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9046762589928058, "success_rate.epoch.env.math": 0.9745882352941176, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8740970072239422, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8749032616746653, "success_rate.epoch.global": 0.9164658634538153, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9927631578947368, "tokens_p.mean_in_band": 0.64013671875, "tokens_rate.above_band": 0.9223300970873787, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07766990291262135 }, { "epoch": 2.798253089049851, "grad_norm": 111.10386601497021, "learning_rate": 3.3716771872120914e-07, "loss": 0.2898, "step": 13135, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9710743801652892, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9031390134529148, "success_rate.epoch.env.math": 0.9746121297602257, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8742700103057368, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8747855985151699, "success_rate.epoch.global": 0.9163101604278074, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973282442748092, "tokens_p.mean_in_band": 0.487640380859375, "tokens_rate.above_band": 0.9761549925484352, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02384500745156483 }, { "epoch": 2.7993182786536, "grad_norm": 68.74999086321412, "learning_rate": 3.3713968886828727e-07, "loss": 0.2222, "step": 13140, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9711934156378601, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9031390134529148, "success_rate.epoch.env.math": 0.974659784138902, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8744855967078189, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8748203509018363, "success_rate.epoch.global": 0.9164218958611482, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948863636363636, "tokens_p.mean_in_band": 0.8388671875, "tokens_rate.above_band": 0.9763313609467456, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023668639053254437 }, { "epoch": 2.80038346825735, "grad_norm": 103.778267862368, "learning_rate": 3.371116756757959e-07, "loss": 0.1873, "step": 13145, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, "success_rate.epoch.env.agentgym:textcraft": 0.9803921568627451, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9565217391304348, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.9747191011235955, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8746145940390545, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8748560933642547, "success_rate.epoch.global": 0.9165333333333333, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948979591836735, "tokens_p.mean_in_band": 0.7864583333333334, "tokens_rate.above_band": 0.9607843137254902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0392156862745098 }, { "epoch": 2.8014486578610995, "grad_norm": 124.46891115703816, "learning_rate": 3.370836791641508e-07, "loss": 0.3429, "step": 13150, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9566473988439307, "success_rate.epoch.env.logic": 0.9033989266547406, "success_rate.epoch.env.math": 0.9747309312119794, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8744870041039672, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8749070109984736, "success_rate.epoch.global": 0.9165113182423436, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967401079136691, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.9893238434163701, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010676156583629894 }, { "epoch": 2.8025138474648488, "grad_norm": 523.9811714252888, "learning_rate": 3.370556993537556e-07, "loss": 0.299, "step": 13155, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8680851063829788, "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9566473988439307, "success_rate.epoch.env.logic": 0.903485254691689, "success_rate.epoch.env.math": 0.9747545582047685, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8743169398907104, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.874272919834788, "success_rate.epoch.global": 0.9162234042553191, "success_rate.window.env.agentgym:alfworld": 0.3333333333333333, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7708333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9995170015455951, "tokens_p.mean_in_band": 0.42578125, "tokens_rate.above_band": 0.9817905918057663, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018209408194233688 }, { "epoch": 2.803579037068598, "grad_norm": 157.03777837965075, "learning_rate": 3.3702773626500187e-07, "loss": 0.2703, "step": 13160, "success_rate.epoch.env.abd": 0.9848484848484849, "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9566473988439307, "success_rate.epoch.env.logic": 0.9026785714285714, "success_rate.epoch.env.math": 0.974766355140187, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8742759795570698, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8742477484486326, "success_rate.epoch.global": 0.9160690571049137, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0003367456896552, "tokens_p.mean_in_band": 0.5755208333333334, "tokens_rate.above_band": 0.9809725158562368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019027484143763214 }, { "epoch": 2.8046442266723477, "grad_norm": 157.95892369019975, "learning_rate": 3.369997899182689e-07, "loss": 0.2342, "step": 13165, "success_rate.epoch.env.abd": 0.984984984984985, "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9566473988439307, "success_rate.epoch.env.logic": 0.9028520499108734, "success_rate.epoch.env.math": 0.9747899159663865, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8744043567052416, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8742897408661939, "success_rate.epoch.global": 0.9161803713527852, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995697463768116, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.8057094162760974, "grad_norm": 81.05017243267453, "learning_rate": 3.369718603339237e-07, "loss": 0.1975, "step": 13170, "success_rate.epoch.env.abd": 0.9850299401197605, "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, "success_rate.epoch.env.agentgym:sciworld": 0.9713114754098361, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9567723342939481, "success_rate.epoch.env.logic": 0.9029385574354408, "success_rate.epoch.env.math": 0.9748251748251748, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8742352141400408, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8743008785391893, "success_rate.epoch.global": 0.916158940397351, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970024807056229, "tokens_p.mean_in_band": 0.4124348958333333, "tokens_rate.above_band": 0.9869423286180631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013057671381936888 }, { "epoch": 2.8067746058798466, "grad_norm": 170.68941697109344, "learning_rate": 3.369439475323212e-07, "loss": 0.3088, "step": 13175, "success_rate.epoch.env.abd": 0.9850299401197605, "success_rate.epoch.env.agentgym:alfworld": 0.8686440677966102, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9030249110320284, "success_rate.epoch.env.math": 0.9748720335039553, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8740237691001698, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8740544710361011, "success_rate.epoch.global": 0.916005291005291, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.993224358049625, "tokens_p.mean_in_band": 0.5604903796487604, "tokens_rate.above_band": 0.8774683544303797, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12253164556962025 }, { "epoch": 2.807839795483596, "grad_norm": 97.06331843866808, "learning_rate": 3.36916051533804e-07, "loss": 0.2659, "step": 13180, "success_rate.epoch.env.abd": 0.9850746268656716, "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9540229885057471, "success_rate.epoch.env.logic": 0.9023957409050577, "success_rate.epoch.env.math": 0.9748953974895398, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8741519674355496, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.874065500434245, "success_rate.epoch.global": 0.9159841479524439, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994340232858991, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9948519948519948, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005148005148005148 }, { "epoch": 2.8089049850873455, "grad_norm": 32.002691076004496, "learning_rate": 3.368881723587027e-07, "loss": 0.2187, "step": 13185, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9541547277936963, "success_rate.epoch.env.logic": 0.9025686448184234, "success_rate.epoch.env.math": 0.9749187180678124, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8742799051169096, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8741149985563403, "success_rate.epoch.global": 0.9160949868073879, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9991554054054054, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9966329966329966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003367003367003367 }, { "epoch": 2.8099701746910952, "grad_norm": 54.994817616911064, "learning_rate": 3.368603100273352e-07, "loss": 0.1712, "step": 13190, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, "success_rate.epoch.env.agentgym:sciworld": 0.9714285714285714, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9541547277936963, "success_rate.epoch.env.logic": 0.9027409372236959, "success_rate.epoch.env.math": 0.9749768303985171, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8744075829383886, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8741475515161092, "success_rate.epoch.global": 0.916205533596838, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9898255813953488, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9772727272727273, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022727272727272728 }, { "epoch": 2.8110353642948445, "grad_norm": 69.48728142410503, "learning_rate": 3.368324645600075e-07, "loss": 0.2032, "step": 13195, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.869198312236287, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9541547277936963, "success_rate.epoch.env.logic": 0.9028268551236749, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8746621621621622, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8741911706746379, "success_rate.epoch.global": 0.9163157894736842, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955658783783784, "tokens_p.mean_in_band": 0.845703125, "tokens_rate.above_band": 0.9367088607594937, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06329113924050633 }, { "epoch": 2.8121005538985937, "grad_norm": 130.95813450468498, "learning_rate": 3.3680463597701315e-07, "loss": 0.2187, "step": 13200, "success_rate.epoch.env.abd": 0.9851632047477745, "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9541547277936963, "success_rate.epoch.env.logic": 0.9029126213592233, "success_rate.epoch.env.math": 0.9750462107208873, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8741565452091767, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.873825194879598, "success_rate.epoch.global": 0.9160315374507227, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975490196078431, "tokens_p.mean_in_band": 0.6214384191176471, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0625 }, { "epoch": 2.8131657435023434, "grad_norm": 145.91243768394122, "learning_rate": 3.3677682429863337e-07, "loss": 0.2175, "step": 13205, "success_rate.epoch.env.abd": 0.985207100591716, "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9541547277936963, "success_rate.epoch.env.logic": 0.9030837004405287, "success_rate.epoch.env.math": 0.9750692520775623, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8743684742337487, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8738660989983703, "success_rate.epoch.global": 0.9161417322834645, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9891098484848485, "tokens_p.mean_in_band": 0.7740885416666666, "tokens_rate.above_band": 0.9565217391304348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043478260869565216 }, { "epoch": 2.814230933106093, "grad_norm": 183.15006531114483, "learning_rate": 3.367490295451372e-07, "loss": 0.3595, "step": 13210, "success_rate.epoch.env.abd": 0.9852941176470589, "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, "success_rate.epoch.env.agentgym:sciworld": 0.9715447154471545, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9541547277936963, "success_rate.epoch.env.logic": 0.903169014084507, "success_rate.epoch.env.math": 0.9751152073732718, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8744952893674294, "success_rate.epoch.env.webshop": 0.975609756097561, "success_rate.epoch.env_macro_mean": 0.8738974718282528, "success_rate.epoch.global": 0.9162516382699869, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970238095238095, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.963302752293578, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03669724770642202 }, { "epoch": 2.8152961227098423, "grad_norm": 265.81524979643154, "learning_rate": 3.367212517367812e-07, "loss": 0.2729, "step": 13215, "success_rate.epoch.env.abd": 0.9853372434017595, "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9542857142857143, "success_rate.epoch.env.logic": 0.9032541776605101, "success_rate.epoch.env.math": 0.9751381215469613, "success_rate.epoch.env.sat": 0.15217391304347827, "success_rate.epoch.env.science": 0.8742857142857143, "success_rate.epoch.env.webshop": 0.9761904761904762, "success_rate.epoch.env_macro_mean": 0.8739673389726892, "success_rate.epoch.global": 0.9162303664921466, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9523809523809523, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941052971576227, "tokens_p.mean_in_band": 0.8032924107142857, "tokens_rate.above_band": 0.9910371318822023, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008962868117797696 }, { "epoch": 2.8163613123135915, "grad_norm": 123.04272163621596, "learning_rate": 3.3669349089380964e-07, "loss": 0.3237, "step": 13220, "success_rate.epoch.env.abd": 0.9853801169590644, "success_rate.epoch.env.agentgym:alfworld": 0.865546218487395, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9542857142857143, "success_rate.epoch.env.logic": 0.9034240561896401, "success_rate.epoch.env.math": 0.9751495628163829, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8744545149378986, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.875693298679784, "success_rate.epoch.global": 0.9163398692810457, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997976618705036, "tokens_p.mean_in_band": 0.796875, "tokens_rate.above_band": 0.9952267303102625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00477326968973747 }, { "epoch": 2.8174265019173412, "grad_norm": 222.77855107590787, "learning_rate": 3.3666574703645443e-07, "loss": 0.1651, "step": 13225, "success_rate.epoch.env.abd": 0.9854651162790697, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975, "success_rate.epoch.env.ded": 0.9542857142857143, "success_rate.epoch.env.logic": 0.9035933391761612, "success_rate.epoch.env.math": 0.9751609935602575, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8745808182427901, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8758307954556535, "success_rate.epoch.global": 0.9164490861618799, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970664928292047, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.818491691521091, "grad_norm": 81.78920043921914, "learning_rate": 3.366380201849351e-07, "loss": 0.1587, "step": 13230, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9807692307692307, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9542857142857143, "success_rate.epoch.env.logic": 0.9037620297462817, "success_rate.epoch.env.math": 0.9751952227836472, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8747068676716918, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8759199641299914, "success_rate.epoch.global": 0.9165580182529335, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980244252873564, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.81955688112484, "grad_norm": 252.80702348321032, "learning_rate": 3.3661031035945877e-07, "loss": 0.2815, "step": 13235, "success_rate.epoch.env.abd": 0.9855072463768116, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9542857142857143, "success_rate.epoch.env.logic": 0.9038461538461539, "success_rate.epoch.env.math": 0.9752407152682255, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8748745399799264, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.875979976275005, "success_rate.epoch.global": 0.9166666666666666, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965494791666667, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9917355371900827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008264462809917356 }, { "epoch": 2.82062207072859, "grad_norm": 49.47513756336224, "learning_rate": 3.3658261758022014e-07, "loss": 0.1779, "step": 13240, "success_rate.epoch.env.abd": 0.9855491329479769, "success_rate.epoch.env.agentgym:alfworld": 0.8666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.97165991902834, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9542857142857143, "success_rate.epoch.env.logic": 0.9039301310043668, "success_rate.epoch.env.math": 0.9752633989922126, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8751252086811352, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8760162686526936, "success_rate.epoch.global": 0.9167750325097529, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9909156976744186, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9608938547486033, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03910614525139665 }, { "epoch": 2.821687260332339, "grad_norm": 143.94319400678518, "learning_rate": 3.365549418674016e-07, "loss": 0.2713, "step": 13245, "success_rate.epoch.env.abd": 0.9855491329479769, "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9542857142857143, "success_rate.epoch.env.logic": 0.9040139616055847, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8752085418752086, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8761461422593382, "success_rate.epoch.global": 0.9168831168831169, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979762801204819, "tokens_p.mean_in_band": 0.8697916666666666, "tokens_rate.above_band": 0.9955022488755623, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004497751124437781 }, { "epoch": 2.8227524499360888, "grad_norm": 98.5871544151332, "learning_rate": 3.365272832411729e-07, "loss": 0.174, "step": 13250, "success_rate.epoch.env.abd": 0.9855907780979827, "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9811320754716981, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9544159544159544, "success_rate.epoch.env.logic": 0.9040139616055847, "success_rate.epoch.env.math": 0.9753537197626655, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8753748750416528, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8761809873715241, "success_rate.epoch.global": 0.9169909208819714, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981231231231231, "tokens_p.mean_in_band": 0.279296875, "tokens_rate.above_band": 0.9970059880239521, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0029940119760479044 }, { "epoch": 2.823817639539838, "grad_norm": 156.31417488362882, "learning_rate": 3.364996417216915e-07, "loss": 0.1741, "step": 13255, "success_rate.epoch.env.abd": 0.9855907780979827, "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, "success_rate.epoch.env.agentgym:sciworld": 0.9717741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9544159544159544, "success_rate.epoch.env.logic": 0.9040976460331299, "success_rate.epoch.env.math": 0.9753761969904241, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8756235450615231, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8762450089792477, "success_rate.epoch.global": 0.917098445595855, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9950911640953717, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9944211994421199, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005578800557880056 }, { "epoch": 2.8248828291435877, "grad_norm": 172.39001359867987, "learning_rate": 3.364720173291023e-07, "loss": 0.1901, "step": 13260, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9545454545454546, "success_rate.epoch.env.logic": 0.9043478260869565, "success_rate.epoch.env.math": 0.9754098360655737, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8756648936170213, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8763004117316524, "success_rate.epoch.global": 0.9172056921086675, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968477584059776, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.825948018747337, "grad_norm": 73.59427325440687, "learning_rate": 3.3644441008353785e-07, "loss": 0.1672, "step": 13265, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8677685950413223, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9546742209631728, "success_rate.epoch.env.logic": 0.9044309296264118, "success_rate.epoch.env.math": 0.9754545454545455, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8758300132802125, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8763387480052285, "success_rate.epoch.global": 0.917312661498708, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988195825049702, "tokens_p.mean_in_band": 0.7200520833333334, "tokens_rate.above_band": 0.9940711462450593, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005928853754940711 }, { "epoch": 2.8270132083510866, "grad_norm": 70.71758750163254, "learning_rate": 3.364168200051181e-07, "loss": 0.1778, "step": 13270, "success_rate.epoch.env.abd": 0.985632183908046, "success_rate.epoch.env.agentgym:alfworld": 0.8683127572016461, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9546742209631728, "success_rate.epoch.env.logic": 0.9045967042497832, "success_rate.epoch.env.math": 0.9754879709487063, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8759946949602122, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8764212974559428, "success_rate.epoch.global": 0.9174193548387096, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979723282442748, "tokens_p.mean_in_band": 0.7174479166666666, "tokens_rate.above_band": 0.9886792452830189, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011320754716981131 }, { "epoch": 2.828078397954836, "grad_norm": 53.306510520850956, "learning_rate": 3.3638924711395037e-07, "loss": 0.2671, "step": 13275, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8688524590163934, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9546742209631728, "success_rate.epoch.env.logic": 0.9045967042497832, "success_rate.epoch.env.math": 0.9755213055303718, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8761999337967561, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8764957923432557, "success_rate.epoch.global": 0.9175257731958762, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987359550561797, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9933035714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006696428571428571 }, { "epoch": 2.8291435875585855, "grad_norm": 158.25389435637865, "learning_rate": 3.363616914301297e-07, "loss": 0.4428, "step": 13280, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8688524590163934, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9548022598870056, "success_rate.epoch.env.logic": 0.9045967042497832, "success_rate.epoch.env.math": 0.9755434782608695, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8764861294583883, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8765354657356159, "success_rate.epoch.global": 0.9176319176319176, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.993455497382199, "tokens_p.mean_in_band": 0.7247242647058824, "tokens_rate.above_band": 0.9182692307692307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08173076923076923 }, { "epoch": 2.8302087771623348, "grad_norm": 95.88155796255984, "learning_rate": 3.3633415297373846e-07, "loss": 0.2301, "step": 13285, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8688524590163934, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9549295774647887, "success_rate.epoch.env.logic": 0.9046793760831889, "success_rate.epoch.env.math": 0.9751356238698011, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8763192612137203, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8765023081697477, "success_rate.epoch.global": 0.9174807197943444, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969827586206896, "tokens_p.mean_in_band": 0.6126302083333334, "tokens_rate.above_band": 0.9797297297297297, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02027027027027027 }, { "epoch": 2.8312739667660844, "grad_norm": 52.86354257532786, "learning_rate": 3.363066317648465e-07, "loss": 0.2282, "step": 13290, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9549295774647887, "success_rate.epoch.env.logic": 0.9047619047619048, "success_rate.epoch.env.math": 0.9751693002257337, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8761936121172209, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8765501128991839, "success_rate.epoch.global": 0.9174582798459564, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953758949880668, "tokens_p.mean_in_band": 0.72578125, "tokens_rate.above_band": 0.9766899766899767, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023310023310023312 }, { "epoch": 2.8323391563698337, "grad_norm": 54.79103491602769, "learning_rate": 3.3627912782351095e-07, "loss": 0.1924, "step": 13295, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9549295774647887, "success_rate.epoch.env.logic": 0.9049265341400173, "success_rate.epoch.env.math": 0.975214060387562, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8760276224926011, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8765540583460312, "success_rate.epoch.global": 0.9174358974358975, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9928728070175439, "tokens_p.mean_in_band": 0.5283203125, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 2.8334043459735834, "grad_norm": 124.81086349279724, "learning_rate": 3.362516411697766e-07, "loss": 0.2107, "step": 13300, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.905090595340811, "success_rate.epoch.env.math": 0.9748088169140801, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.876149802890933, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8765547492947707, "success_rate.epoch.global": 0.9174135723431498, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0002172653534183, "tokens_p.mean_below_band": 5.029141902923584e-07, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9953863898500577, "tokens_rate.below_band": 0.0011534025374855825, "tokens_rate.in_band": 0.0034602076124567475 }, { "epoch": 2.8344695355773326, "grad_norm": 89.37378216017451, "learning_rate": 3.3622417182367556e-07, "loss": 0.247, "step": 13305, "success_rate.epoch.env.abd": 0.9856733524355301, "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9550561797752809, "success_rate.epoch.env.logic": 0.9044750430292599, "success_rate.epoch.env.math": 0.9748540637629097, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8759435510338037, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8764841531747845, "success_rate.epoch.global": 0.9172634271099744, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954323444283647, "tokens_p.mean_in_band": 0.6072048611111112, "tokens_rate.above_band": 0.9504814305364512, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04951856946354883 }, { "epoch": 2.8355347251810823, "grad_norm": 121.04398174470911, "learning_rate": 3.3619671980522723e-07, "loss": 0.1841, "step": 13310, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, "success_rate.epoch.env.agentgym:sciworld": 0.9718875502008032, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9551820728291317, "success_rate.epoch.env.logic": 0.9037800687285223, "success_rate.epoch.env.math": 0.9748878923766816, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8760655737704918, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8764503080277236, "success_rate.epoch.global": 0.9172413793103448, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998471882640587, "tokens_p.mean_in_band": 0.69375, "tokens_rate.above_band": 0.9939246658566221, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006075334143377886 }, { "epoch": 2.8365999147848315, "grad_norm": 382.8600877425378, "learning_rate": 3.361692851344384e-07, "loss": 0.2076, "step": 13315, "success_rate.epoch.env.abd": 0.9857142857142858, "success_rate.epoch.env.agentgym:alfworld": 0.8693877551020408, "success_rate.epoch.env.agentgym:sciworld": 0.972, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.975609756097561, "success_rate.epoch.env.ded": 0.9553072625698324, "success_rate.epoch.env.logic": 0.903862660944206, "success_rate.epoch.env.math": 0.974910394265233, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8762684124386252, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8764999055106565, "success_rate.epoch.global": 0.9173469387755102, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964519650655022, "tokens_p.mean_in_band": 0.76953125, "tokens_rate.above_band": 0.9870689655172413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01293103448275862 }, { "epoch": 2.837665104388581, "grad_norm": 29.056420800762854, "learning_rate": 3.361418678313033e-07, "loss": 0.3414, "step": 13320, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8704453441295547, "success_rate.epoch.env.agentgym:sciworld": 0.9721115537848606, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9553072625698324, "success_rate.epoch.env.logic": 0.903862660944206, "success_rate.epoch.env.math": 0.9749440715883669, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8763493621197253, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8766731045970407, "success_rate.epoch.global": 0.9174522292993631, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998320895522388, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9985096870342772, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014903129657228018 }, { "epoch": 2.838730293992331, "grad_norm": 54.592933557964905, "learning_rate": 3.361144679158035e-07, "loss": 0.1603, "step": 13325, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8704453441295547, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9553072625698324, "success_rate.epoch.env.logic": 0.9040274207369323, "success_rate.epoch.env.math": 0.9749664729548503, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8765512736773351, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8767185356110571, "success_rate.epoch.global": 0.917557251908397, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984504132231405, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.83979548359608, "grad_norm": 230.25757852382318, "learning_rate": 3.3608708540790793e-07, "loss": 0.628, "step": 13330, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8669354838709677, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9554317548746518, "success_rate.epoch.env.logic": 0.9032534246575342, "success_rate.epoch.env.math": 0.9749776586237712, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8767926988265972, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.876363376227877, "success_rate.epoch.global": 0.9174078780177891, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980432780847146, "tokens_p.mean_in_band": 0.6573893229166666, "tokens_rate.above_band": 0.9576719576719577, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042328042328042326 }, { "epoch": 2.8408606731998294, "grad_norm": 68.79339207379915, "learning_rate": 3.3605972032757257e-07, "loss": 0.1025, "step": 13335, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8674698795180723, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9033361847733106, "success_rate.epoch.env.math": 0.9750223015165032, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8769130576359492, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8764457360593195, "success_rate.epoch.global": 0.9175126903553299, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.0006009615384615, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.841925862803579, "grad_norm": 0.0, "learning_rate": 3.3603237269474103e-07, "loss": 0.3074, "step": 13340, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.868, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9035012809564474, "success_rate.epoch.env.math": 0.9750334373606777, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8765030874228145, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.87647267990442, "success_rate.epoch.global": 0.9173637515842838, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971046325878594, "tokens_p.mean_in_band": 0.6453683035714286, "tokens_rate.above_band": 0.9178885630498533, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08211143695014662 }, { "epoch": 2.8429910524073287, "grad_norm": 74.57926843853564, "learning_rate": 3.3600504252934405e-07, "loss": 0.1891, "step": 13345, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8690476190476191, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9814814814814815, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9555555555555556, "success_rate.epoch.env.logic": 0.9027303754266212, "success_rate.epoch.env.math": 0.97506678539626, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8763388510223953, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8764859367365069, "success_rate.epoch.global": 0.9172151898734178, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994127101879328, "tokens_p.mean_below_band": 6.891787052154541e-07, "tokens_p.mean_in_band": 0.5518092105263158, "tokens_rate.above_band": 0.9806013579049466, "tokens_rate.below_band": 0.0009699321047526673, "tokens_rate.in_band": 0.01842870999030068 }, { "epoch": 2.844056242011078, "grad_norm": 1.1001882353835348, "learning_rate": 3.359777298512996e-07, "loss": 0.2088, "step": 13350, "success_rate.epoch.env.abd": 0.9857549857549858, "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9556786703601108, "success_rate.epoch.env.logic": 0.9028132992327366, "success_rate.epoch.env.math": 0.9750778816199377, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8762151652624757, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8762127711533118, "success_rate.epoch.global": 0.9170670037926675, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7999999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990633514986376, "tokens_p.mean_below_band": 6.007030606269836e-08, "tokens_p.mean_in_band": 0.7, "tokens_rate.above_band": 0.9839142091152815, "tokens_rate.below_band": 0.002680965147453083, "tokens_rate.in_band": 0.013404825737265416 }, { "epoch": 2.8451214316148272, "grad_norm": 606.8623382270667, "learning_rate": 3.35950434680513e-07, "loss": 0.201, "step": 13355, "success_rate.epoch.env.abd": 0.9857954545454546, "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9761904761904762, "success_rate.epoch.env.ded": 0.9556786703601108, "success_rate.epoch.env.logic": 0.9028132992327366, "success_rate.epoch.env.math": 0.9751221679253665, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8764153995470721, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8762386792788113, "success_rate.epoch.global": 0.9171717171717172, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9938668224299065, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.846186621218577, "grad_norm": 84.20516969002915, "learning_rate": 3.359231570368768e-07, "loss": 0.1273, "step": 13360, "success_rate.epoch.env.abd": 0.9857954545454546, "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9556786703601108, "success_rate.epoch.env.logic": 0.9029787234042553, "success_rate.epoch.env.math": 0.9751552795031055, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8765751211631664, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8763215853898465, "success_rate.epoch.global": 0.9172761664564943, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975079744816587, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9874015748031496, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012598425196850394 }, { "epoch": 2.8472518108223266, "grad_norm": 62.607543112608894, "learning_rate": 3.3589589694027076e-07, "loss": 0.1702, "step": 13365, "success_rate.epoch.env.abd": 0.9858356940509915, "success_rate.epoch.env.agentgym:alfworld": 0.8656126482213439, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9558011049723757, "success_rate.epoch.env.logic": 0.903143585386576, "success_rate.epoch.env.math": 0.974756421612046, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8766548272521796, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8763223475987623, "success_rate.epoch.global": 0.9172544080604534, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981132075471698, "tokens_p.mean_below_band": 1.318767317570746e-10, "tokens_rate.above_band": 0.9962406015037594, "tokens_rate.below_band": 0.0037593984962406013, "tokens_rate.in_band": 0.0 }, { "epoch": 2.848317000426076, "grad_norm": 68.99032333810347, "learning_rate": 3.358686544105618e-07, "loss": 0.349, "step": 13370, "success_rate.epoch.env.abd": 0.9858757062146892, "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9559228650137741, "success_rate.epoch.env.logic": 0.902376910016978, "success_rate.epoch.env.math": 0.9747675962815405, "success_rate.epoch.env.sat": 0.1702127659574468, "success_rate.epoch.env.science": 0.8762088974854932, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8762759317624376, "success_rate.epoch.global": 0.9169811320754717, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9994916523972602, "tokens_p.mean_in_band": 0.6077745225694444, "tokens_rate.above_band": 0.984822934232715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01517706576728499 }, { "epoch": 2.849382190029825, "grad_norm": 5.639069075887996, "learning_rate": 3.358414294676041e-07, "loss": 0.2665, "step": 13375, "success_rate.epoch.env.abd": 0.9859154929577465, "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, "success_rate.epoch.env.agentgym:sciworld": 0.9722222222222222, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9532967032967034, "success_rate.epoch.env.logic": 0.9017781541066893, "success_rate.epoch.env.math": 0.9748010610079576, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8762487914921044, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8756706707222509, "success_rate.epoch.global": 0.9167085427135678, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.611111111111111, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9930524800708592, "tokens_p.mean_in_band": 0.7182348901098901, "tokens_rate.above_band": 0.9254098360655738, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07459016393442623 }, { "epoch": 2.8504473796335748, "grad_norm": 152.46141799171193, "learning_rate": 3.3581422213123897e-07, "loss": 0.3786, "step": 13380, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9532967032967034, "success_rate.epoch.env.logic": 0.9017781541066893, "success_rate.epoch.env.math": 0.9748344370860927, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8764478764478765, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8757053814353408, "success_rate.epoch.global": 0.9168130489335006, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959239130434783, "tokens_p.mean_in_band": 0.8072916666666666, "tokens_rate.above_band": 0.9787234042553191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02127659574468085 }, { "epoch": 2.8515125692373244, "grad_norm": 69.35748201714661, "learning_rate": 3.357870324212949e-07, "loss": 0.1599, "step": 13385, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8661417322834646, "success_rate.epoch.env.agentgym:sciworld": 0.9723320158102767, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9534246575342465, "success_rate.epoch.env.logic": 0.9018612521150592, "success_rate.epoch.env.math": 0.9748566387295986, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8766859344894027, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8757482279745177, "success_rate.epoch.global": 0.9169172932330827, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953775038520801, "tokens_p.mean_in_band": 0.8331473214285714, "tokens_rate.above_band": 0.989329268292683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010670731707317074 }, { "epoch": 2.8525777588410737, "grad_norm": 44.45150645831097, "learning_rate": 3.357598603575877e-07, "loss": 0.2542, "step": 13390, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.8671875, "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.9010989010989011, "success_rate.epoch.env.math": 0.9748566387295986, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.876883616543764, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8755650500129952, "success_rate.epoch.global": 0.9167709637046307, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989785992217899, "tokens_p.mean_in_band": 0.5740831163194444, "tokens_rate.above_band": 0.9727479182437547, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02725208175624527 }, { "epoch": 2.853642948444823, "grad_norm": 94.03469740590714, "learning_rate": 3.3573270595992e-07, "loss": 0.3534, "step": 13395, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, "success_rate.epoch.env.agentgym:sciworld": 0.9724409448818898, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9508196721311475, "success_rate.epoch.env.logic": 0.90042194092827, "success_rate.epoch.env.math": 0.9749009247027741, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8770019218449712, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8755652692183946, "success_rate.epoch.global": 0.91675, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990327380952381, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.975609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024390243902439025 }, { "epoch": 2.8547081380485726, "grad_norm": 116.54053470997088, "learning_rate": 3.3570556924808187e-07, "loss": 0.1804, "step": 13400, "success_rate.epoch.env.abd": 0.9859550561797753, "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9510869565217391, "success_rate.epoch.env.logic": 0.9005059021922428, "success_rate.epoch.env.math": 0.9749119718309859, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8771985929005437, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8756259090878765, "success_rate.epoch.global": 0.9168539325842696, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958558863328822, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9986486486486487, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0013513513513513514 }, { "epoch": 2.8557733276523223, "grad_norm": 183.97816327182832, "learning_rate": 3.3567845024185023e-07, "loss": 0.1388, "step": 13405, "success_rate.epoch.env.abd": 0.9859943977591037, "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9510869565217391, "success_rate.epoch.env.logic": 0.9005897219882055, "success_rate.epoch.env.math": 0.9749560632688928, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8773554774832322, "success_rate.epoch.env.webshop": 0.9767441860465116, "success_rate.epoch.env_macro_mean": 0.8756553761238663, "success_rate.epoch.global": 0.9169576059850374, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919270833333333, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9836065573770492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01639344262295082 }, { "epoch": 2.8568385172560715, "grad_norm": 178.02878990784518, "learning_rate": 3.3565134896098936e-07, "loss": 0.2687, "step": 13410, "success_rate.epoch.env.abd": 0.9859943977591037, "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9510869565217391, "success_rate.epoch.env.logic": 0.9006734006734006, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.877511961722488, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8757292525676639, "success_rate.epoch.global": 0.9170610211706102, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972174657534246, "tokens_p.mean_in_band": 0.8522135416666666, "tokens_rate.above_band": 0.9605263157894737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039473684210526314 }, { "epoch": 2.857903706859821, "grad_norm": 43.520273841247246, "learning_rate": 3.356242654252503e-07, "loss": 0.2583, "step": 13415, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.867704280155642, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.9006734006734006, "success_rate.epoch.env.math": 0.9750328515111695, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8776680471487734, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8757740210732127, "success_rate.epoch.global": 0.9171641791044776, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99737548828125, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9980506822612085, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001949317738791423 }, { "epoch": 2.8589688964635704, "grad_norm": 870.4958100045096, "learning_rate": 3.3559719965437146e-07, "loss": 0.372, "step": 13420, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8682170542635659, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.9007569386038689, "success_rate.epoch.env.math": 0.9746281714785652, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8778625954198473, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8758091283711091, "success_rate.epoch.global": 0.9171428571428571, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970366379310345, "tokens_p.mean_in_band": 0.6142578125, "tokens_rate.above_band": 0.9775280898876404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02247191011235955 }, { "epoch": 2.86003408606732, "grad_norm": 75.06795998869764, "learning_rate": 3.355701516680781e-07, "loss": 0.2515, "step": 13425, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8682170542635659, "success_rate.epoch.env.agentgym:sciworld": 0.9725490196078431, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.9007569386038689, "success_rate.epoch.env.math": 0.9746835443037974, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8774214036201969, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8757740539188892, "success_rate.epoch.global": 0.9169975186104219, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9923913043478261, "tokens_p.mean_in_band": 0.5133579799107143, "tokens_rate.above_band": 0.9426229508196722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05737704918032787 }, { "epoch": 2.8610992756710694, "grad_norm": 308.01910168070776, "learning_rate": 3.3554312148608277e-07, "loss": 0.2097, "step": 13430, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8687258687258688, "success_rate.epoch.env.agentgym:sciworld": 0.9727626459143969, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.9008403361344538, "success_rate.epoch.env.math": 0.974716652136007, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8775380710659898, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8758609278804751, "success_rate.epoch.global": 0.9171003717472119, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9974729498861048, "tokens_p.mean_in_band": 0.7608506944444444, "tokens_rate.above_band": 0.9898534385569335, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010146561443066516 }, { "epoch": 2.862164465274819, "grad_norm": 92.63997390591346, "learning_rate": 3.3551610912808467e-07, "loss": 0.1854, "step": 13435, "success_rate.epoch.env.abd": 0.9860335195530726, "success_rate.epoch.env.agentgym:alfworld": 0.8692307692307693, "success_rate.epoch.env.agentgym:sciworld": 0.9727626459143969, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.9009235936188077, "success_rate.epoch.env.math": 0.9747386759581882, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8774540848638379, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8759087638449554, "success_rate.epoch.global": 0.9170792079207921, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961939102564102, "tokens_p.mean_in_band": 0.37646484375, "tokens_rate.above_band": 0.9957446808510638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00425531914893617 }, { "epoch": 2.8632296548785683, "grad_norm": 311.7292311278598, "learning_rate": 3.354891146137703e-07, "loss": 0.2847, "step": 13440, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8692307692307693, "success_rate.epoch.env.agentgym:sciworld": 0.9689922480620154, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9513513513513514, "success_rate.epoch.env.logic": 0.9010067114093959, "success_rate.epoch.env.math": 0.9748045178105995, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8774928774928775, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8755866055054021, "success_rate.epoch.global": 0.9170580964153275, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987824675324676, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9914163090128756, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008583690987124463 }, { "epoch": 2.864294844482318, "grad_norm": 42.1001288752035, "learning_rate": 3.35462137962813e-07, "loss": 0.2739, "step": 13445, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8692307692307693, "success_rate.epoch.env.agentgym:sciworld": 0.9691119691119691, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.9011725293132329, "success_rate.epoch.env.math": 0.9748373101952278, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8776091081593927, "success_rate.epoch.env.webshop": 0.9772727272727273, "success_rate.epoch.env_macro_mean": 0.8756380319088521, "success_rate.epoch.global": 0.9171604938271605, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966469957081545, "tokens_p.mean_in_band": 0.83056640625, "tokens_rate.above_band": 0.9831223628691983, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016877637130801686 }, { "epoch": 2.865360034086067, "grad_norm": 70.20439648481599, "learning_rate": 3.3543517919487335e-07, "loss": 0.1487, "step": 13450, "success_rate.epoch.env.abd": 0.9860724233983287, "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.9013377926421404, "success_rate.epoch.env.math": 0.9748482220294883, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.8774091627172196, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8754363523501735, "success_rate.epoch.global": 0.9170160295930949, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.861111111111111, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985567269076305, "tokens_p.mean_in_band": 0.726318359375, "tokens_rate.above_band": 0.9920318725099602, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00796812749003984 }, { "epoch": 2.866425223689817, "grad_norm": 211.89191238439255, "learning_rate": 3.3540823832959844e-07, "loss": 0.2181, "step": 13455, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.9015025041736227, "success_rate.epoch.env.math": 0.9748591244039878, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8775639002840012, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8751641843517873, "success_rate.epoch.global": 0.9169950738916256, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936548223350253, "tokens_p.mean_in_band": 0.716796875, "tokens_rate.above_band": 0.9248826291079812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07511737089201878 }, { "epoch": 2.867490413293566, "grad_norm": 54.87609346706096, "learning_rate": 3.353813153866228e-07, "loss": 0.1973, "step": 13460, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.9016666666666666, "success_rate.epoch.env.math": 0.9748917748917749, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8774417139256458, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.875170968590194, "success_rate.epoch.global": 0.9169741697416974, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992953431372549, "tokens_p.mean_in_band": 0.672607421875, "tokens_rate.above_band": 0.864406779661017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13559322033898305 }, { "epoch": 2.868555602897316, "grad_norm": 673.8028373010998, "learning_rate": 3.353544103855676e-07, "loss": 0.1918, "step": 13465, "success_rate.epoch.env.abd": 0.9861495844875346, "success_rate.epoch.env.agentgym:alfworld": 0.8664122137404581, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.9016666666666666, "success_rate.epoch.env.math": 0.9749460043196544, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8776344762503933, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8751934223858875, "success_rate.epoch.global": 0.9170761670761671, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953703703703703, "tokens_p.mean_in_band": 0.7896205357142857, "tokens_rate.above_band": 0.9391304347826087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06086956521739131 }, { "epoch": 2.869620792501065, "grad_norm": 72.82181013389518, "learning_rate": 3.3532752334604095e-07, "loss": 0.2742, "step": 13470, "success_rate.epoch.env.abd": 0.9861878453038674, "success_rate.epoch.env.agentgym:alfworld": 0.8669201520912547, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9514824797843666, "success_rate.epoch.env.logic": 0.9012448132780083, "success_rate.epoch.env.math": 0.9749460043196544, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8774355751099937, "success_rate.epoch.env.webshop": 0.9777777777777777, "success_rate.epoch.env_macro_mean": 0.8751866446257122, "success_rate.epoch.global": 0.9169325153374233, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.8, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999751655629139, "tokens_p.mean_in_band": 0.5293817934782609, "tokens_rate.above_band": 0.9704370179948586, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02956298200514139 }, { "epoch": 2.8706859821048147, "grad_norm": 64.28836460039312, "learning_rate": 3.3530065428763797e-07, "loss": 0.2252, "step": 13475, "success_rate.epoch.env.abd": 0.9861878453038674, "success_rate.epoch.env.agentgym:alfworld": 0.8674242424242424, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9517426273458445, "success_rate.epoch.env.logic": 0.9012448132780083, "success_rate.epoch.env.math": 0.97498921949116, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8775125628140703, "success_rate.epoch.env.webshop": 0.9782608695652174, "success_rate.epoch.env_macro_mean": 0.8753109657673019, "success_rate.epoch.global": 0.9170343137254902, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979608482871125, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9975589910496339, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0024410089503661514 }, { "epoch": 2.871751171708564, "grad_norm": 35.587853025261694, "learning_rate": 3.3527380322994074e-07, "loss": 0.1936, "step": 13480, "success_rate.epoch.env.abd": 0.9862258953168044, "success_rate.epoch.env.agentgym:alfworld": 0.8674242424242424, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9518716577540107, "success_rate.epoch.env.logic": 0.9012448132780083, "success_rate.epoch.env.math": 0.9750322858372794, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8773525721455459, "success_rate.epoch.env.webshop": 0.9782608695652174, "success_rate.epoch.env_macro_mean": 0.8753155254126382, "success_rate.epoch.global": 0.9170134638922889, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969857283464567, "tokens_p.mean_in_band": 0.767578125, "tokens_rate.above_band": 0.9921875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0078125 }, { "epoch": 2.8728163613123137, "grad_norm": 129.34319327766767, "learning_rate": 3.35246970192518e-07, "loss": 0.142, "step": 13485, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8674242424242424, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.952, "success_rate.epoch.env.logic": 0.900497512437811, "success_rate.epoch.env.math": 0.9750322858372794, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8772314437832759, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752971547108049, "success_rate.epoch.global": 0.9168704156479217, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.000129132231405, "tokens_p.mean_below_band": 5.617039278149605e-09, "tokens_p.mean_in_band": 0.5811011904761905, "tokens_rate.above_band": 0.9850746268656716, "tokens_rate.below_band": 0.0006784260515603799, "tokens_rate.in_band": 0.014246947082767978 }, { "epoch": 2.873881550916063, "grad_norm": 144.89967409792152, "learning_rate": 3.3522015519492546e-07, "loss": 0.248, "step": 13490, "success_rate.epoch.env.abd": 0.9863013698630136, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.900497512437811, "success_rate.epoch.env.math": 0.9750859106529209, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8770337922403004, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8753411473221483, "success_rate.epoch.global": 0.9168498168498168, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983811936936937, "tokens_p.mean_in_band": 0.43861607142857145, "tokens_rate.above_band": 0.9921787709497206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00782122905027933 }, { "epoch": 2.8749467405198126, "grad_norm": 122.80513310926591, "learning_rate": 3.3519335825670575e-07, "loss": 0.2702, "step": 13495, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.8999172870140613, "success_rate.epoch.env.math": 0.9751499571550986, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8770337922403004, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752976245066594, "success_rate.epoch.global": 0.916829268292683, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993372756933115, "tokens_p.mean_in_band": 0.5691636029411765, "tokens_rate.above_band": 0.973015873015873, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026984126984126985 }, { "epoch": 2.8760119301235623, "grad_norm": 69.21511294194717, "learning_rate": 3.3516657939738824e-07, "loss": 0.2618, "step": 13500, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.8679245283018868, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.8993399339933993, "success_rate.epoch.env.math": 0.9751605995717345, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8766396002498439, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752102697253428, "success_rate.epoch.global": 0.9165651644336176, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969991721854304, "tokens_p.mean_in_band": 0.5604248046875, "tokens_rate.above_band": 0.949685534591195, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050314465408805034 }, { "epoch": 2.8770771197273115, "grad_norm": 228.1119840975371, "learning_rate": 3.351398186364893e-07, "loss": 0.1488, "step": 13505, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9767441860465116, "success_rate.epoch.env.ded": 0.9521276595744681, "success_rate.epoch.env.logic": 0.8988486842105263, "success_rate.epoch.env.math": 0.9752030782385634, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8766781142678739, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752181122009499, "success_rate.epoch.global": 0.9165450121654501, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983571029082774, "tokens_p.mean_in_band": 0.5720703125, "tokens_rate.above_band": 0.9781181619256017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02188183807439825 }, { "epoch": 2.8781423093310607, "grad_norm": 40.752070478562615, "learning_rate": 3.351130759935118e-07, "loss": 0.2747, "step": 13510, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.8988486842105263, "success_rate.epoch.env.math": 0.9748186086214256, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8765201122544434, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.875228389649222, "success_rate.epoch.global": 0.9164034021871202, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974880790190735, "tokens_p.mean_in_band": 0.53369140625, "tokens_rate.above_band": 0.9786666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021333333333333333 }, { "epoch": 2.8792074989348104, "grad_norm": 641.8474036380121, "learning_rate": 3.3508635148794573e-07, "loss": 0.2009, "step": 13515, "success_rate.epoch.env.abd": 0.9863387978142076, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.898360655737705, "success_rate.epoch.env.math": 0.9748400852878465, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8766739333540953, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8751999595849721, "success_rate.epoch.global": 0.9163834951456311, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930037313432836, "tokens_p.mean_in_band": 0.6909722222222222, "tokens_rate.above_band": 0.9370629370629371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06293706293706294 }, { "epoch": 2.88027268853856, "grad_norm": 142.9246755014505, "learning_rate": 3.350596451392677e-07, "loss": 0.1623, "step": 13520, "success_rate.epoch.env.abd": 0.9864130434782609, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.898360655737705, "success_rate.epoch.env.math": 0.9748829289059174, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8768273716951789, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752245530052637, "success_rate.epoch.global": 0.9164848484848485, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9968487394957983, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9916666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008333333333333333 }, { "epoch": 2.8813378781423093, "grad_norm": 248.38681228361213, "learning_rate": 3.3503295696694103e-07, "loss": 0.3536, "step": 13525, "success_rate.epoch.env.abd": 0.986449864498645, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9522546419098143, "success_rate.epoch.env.logic": 0.8984438984438985, "success_rate.epoch.env.math": 0.9748829289059174, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8762022959975179, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8751786428260744, "success_rate.epoch.global": 0.9162227602905569, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9890350877192983, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.8837209302325582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11627906976744186 }, { "epoch": 2.8824030677460586, "grad_norm": 782.1273231612497, "learning_rate": 3.3500628699041583e-07, "loss": 0.2404, "step": 13530, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9692307692307692, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8985270049099836, "success_rate.epoch.env.math": 0.9749149659863946, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8763557483731019, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.875217872678904, "success_rate.epoch.global": 0.9163240628778718, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988484087102177, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.8834682573498083, "grad_norm": 66.71063310634881, "learning_rate": 3.3497963522912913e-07, "loss": 0.1643, "step": 13535, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9523809523809523, "success_rate.epoch.env.logic": 0.8986099754701553, "success_rate.epoch.env.math": 0.9749469214437367, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8765470297029703, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752564269625807, "success_rate.epoch.global": 0.9164251207729469, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9972209618874773, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.884533446953558, "grad_norm": 122.24913476510302, "learning_rate": 3.3495300170250446e-07, "loss": 0.1606, "step": 13540, "success_rate.epoch.env.abd": 0.9864864864864865, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9693486590038314, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8986099754701553, "success_rate.epoch.env.math": 0.9749787955894826, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8764669549104386, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752634672606107, "success_rate.epoch.global": 0.9164053075995174, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0001237623762376, "tokens_p.mean_in_band": 0.6186079545454546, "tokens_rate.above_band": 0.9892262487757101, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010773751224289911 }, { "epoch": 2.885598636557307, "grad_norm": 67.38008404636768, "learning_rate": 3.349263864299522e-07, "loss": 0.2854, "step": 13545, "success_rate.epoch.env.abd": 0.9865591397849462, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8986928104575164, "success_rate.epoch.env.math": 0.9749894022891056, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.876040703052729, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752504520768712, "success_rate.epoch.global": 0.9162650602409639, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968039772727273, "tokens_p.mean_in_band": 0.66015625, "tokens_rate.above_band": 0.9606986899563319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039301310043668124 }, { "epoch": 2.8866638261610564, "grad_norm": 159.066285551253, "learning_rate": 3.3489978943086924e-07, "loss": 0.3446, "step": 13550, "success_rate.epoch.env.abd": 0.9865591397849462, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8988580750407831, "success_rate.epoch.env.math": 0.9750528541226215, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8761171032357473, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752781899495804, "success_rate.epoch.global": 0.9163658243080626, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9859154929577465, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014084507042253521 }, { "epoch": 2.887729015764806, "grad_norm": 114.94302786061347, "learning_rate": 3.348732107246394e-07, "loss": 0.3266, "step": 13555, "success_rate.epoch.env.abd": 0.9865951742627346, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8991049633848658, "success_rate.epoch.env.math": 0.9750739332488382, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8759618344105878, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752917111425739, "success_rate.epoch.global": 0.9163461538461538, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952083333333334, "tokens_p.mean_in_band": 0.66494140625, "tokens_rate.above_band": 0.967741935483871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03225806451612903 }, { "epoch": 2.888794205368556, "grad_norm": 164.51186914673178, "learning_rate": 3.348466503306331e-07, "loss": 0.1185, "step": 13560, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9525065963060686, "success_rate.epoch.env.logic": 0.8992688870836718, "success_rate.epoch.env.math": 0.9746728577458843, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8761143559790963, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8752905167942369, "success_rate.epoch.global": 0.9163265306122449, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9911858974358975, "tokens_p.mean_in_band": 0.7340198863636364, "tokens_rate.above_band": 0.9140625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0859375 }, { "epoch": 2.889859394972305, "grad_norm": 67.36719506297227, "learning_rate": 3.3482010826820716e-07, "loss": 0.3898, "step": 13565, "success_rate.epoch.env.abd": 0.9866666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.868421052631579, "success_rate.epoch.env.agentgym:sciworld": 0.9694656488549618, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9526315789473684, "success_rate.epoch.env.logic": 0.8992688870836718, "success_rate.epoch.env.math": 0.9747048903878583, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8763424363301626, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8753255254882677, "success_rate.epoch.global": 0.9164268585131895, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962311557788944, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9851485148514851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01485148514851485 }, { "epoch": 2.8909245845760543, "grad_norm": 16.71514285348692, "learning_rate": 3.347935845567054e-07, "loss": 0.1771, "step": 13570, "success_rate.epoch.env.abd": 0.9867021276595744, "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.8992688870836718, "success_rate.epoch.env.math": 0.9747368421052631, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8764561618638872, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8754086500382016, "success_rate.epoch.global": 0.9165269461077844, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992088607594937, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9978947368421053, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002105263157894737 }, { "epoch": 2.891989774179804, "grad_norm": 240.080774308402, "learning_rate": 3.3476707921545815e-07, "loss": 0.2157, "step": 13575, "success_rate.epoch.env.abd": 0.9867021276595744, "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.952755905511811, "success_rate.epoch.env.logic": 0.898538961038961, "success_rate.epoch.env.math": 0.9747793190416141, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8760330578512396, "success_rate.epoch.env.webshop": 0.9787234042553191, "success_rate.epoch.env_macro_mean": 0.8753076906635645, "success_rate.epoch.global": 0.916267942583732, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9937106918238994, "tokens_p.mean_in_band": 0.596484375, "tokens_rate.above_band": 0.888268156424581, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11173184357541899 }, { "epoch": 2.8930549637835536, "grad_norm": 133.50116436488105, "learning_rate": 3.347405922637822e-07, "loss": 0.2272, "step": 13580, "success_rate.epoch.env.abd": 0.9867724867724867, "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.898538961038961, "success_rate.epoch.env.math": 0.9747899159663865, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8762224938875306, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8753838115840504, "success_rate.epoch.global": 0.9163679808841099, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.998761655011655, "tokens_p.mean_in_band": 0.7135416666666666, "tokens_rate.above_band": 0.9930555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006944444444444444 }, { "epoch": 2.894120153387303, "grad_norm": 2.4929023943807134, "learning_rate": 3.3471412372098104e-07, "loss": 0.3044, "step": 13585, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8689138576779026, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.8987034035656402, "success_rate.epoch.env.math": 0.9748322147651006, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8763358778625954, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8754160867104838, "success_rate.epoch.global": 0.9164677804295943, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9952586206896552, "tokens_p.mean_in_band": 0.6494140625, "tokens_rate.above_band": 0.9863945578231292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013605442176870748 }, { "epoch": 2.8951853429910526, "grad_norm": 165.6439172562267, "learning_rate": 3.3468767360634485e-07, "loss": 0.1351, "step": 13590, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8694029850746269, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9528795811518325, "success_rate.epoch.env.logic": 0.8987854251012146, "success_rate.epoch.env.math": 0.9748533109807209, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8765620237732399, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8754904858976258, "success_rate.epoch.global": 0.9165673420738975, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996905193236715, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.896250532594802, "grad_norm": 58.68080529966273, "learning_rate": 3.346612419391502e-07, "loss": 0.1506, "step": 13595, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8698884758364313, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.9530026109660574, "success_rate.epoch.env.logic": 0.8988673139158576, "success_rate.epoch.env.math": 0.9748848890749268, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8767123287671232, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.875569785213877, "success_rate.epoch.global": 0.9166666666666666, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980978260869565, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.8973157221985515, "grad_norm": 66.77644335245357, "learning_rate": 3.3463482873866034e-07, "loss": 0.1027, "step": 13600, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8703703703703703, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8988673139158576, "success_rate.epoch.env.math": 0.9749058971141782, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.8769371011850501, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8756470637616096, "success_rate.epoch.global": 0.9167657550535078, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977014010507881, "tokens_p.mean_in_band": 0.751953125, "tokens_rate.above_band": 0.9896013864818024, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010398613518197574 }, { "epoch": 2.8983809118023007, "grad_norm": 0.0, "learning_rate": 3.346084340241251e-07, "loss": 0.1737, "step": 13605, "success_rate.epoch.env.abd": 0.9868073878627969, "success_rate.epoch.env.agentgym:alfworld": 0.8708487084870848, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9772727272727273, "success_rate.epoch.env.ded": 0.953125, "success_rate.epoch.env.logic": 0.8988673139158576, "success_rate.epoch.env.math": 0.9749582637729549, "success_rate.epoch.env.sat": 0.16326530612244897, "success_rate.epoch.env.science": 0.876783004552352, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8756813008654999, "success_rate.epoch.global": 0.9167458432304038, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981617647058824, "tokens_p.mean_in_band": 0.46707589285714285, "tokens_rate.above_band": 0.9807692307692307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019230769230769232 }, { "epoch": 2.8994461014060504, "grad_norm": 376.0435289587504, "learning_rate": 3.3458205781478055e-07, "loss": 0.2483, "step": 13610, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8708487084870848, "success_rate.epoch.env.agentgym:sciworld": 0.9695817490494296, "success_rate.epoch.env.agentgym:textcraft": 0.9818181818181818, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9506493506493506, "success_rate.epoch.env.logic": 0.898949070331447, "success_rate.epoch.env.math": 0.9749791492910759, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.87689508793208, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8752279861151138, "success_rate.epoch.global": 0.9166073546856465, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9905625, "tokens_p.mean_in_band": 0.5939903846153847, "tokens_rate.above_band": 0.8368200836820083, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16317991631799164 }, { "epoch": 2.9005112910097997, "grad_norm": 72.80282515141374, "learning_rate": 3.345557001298497e-07, "loss": 0.2528, "step": 13615, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9506493506493506, "success_rate.epoch.env.logic": 0.898949070331447, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.8764759309718437, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.874940707938509, "success_rate.epoch.global": 0.9163507109004739, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976624015748031, "tokens_p.mean_in_band": 0.5286458333333334, "tokens_rate.above_band": 0.9548872180451128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045112781954887216 }, { "epoch": 2.9015764806135493, "grad_norm": 719.3361544846681, "learning_rate": 3.3452936098854174e-07, "loss": 0.2242, "step": 13620, "success_rate.epoch.env.abd": 0.9868421052631579, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9699248120300752, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9506493506493506, "success_rate.epoch.env.logic": 0.8991121872477804, "success_rate.epoch.env.math": 0.9750208159866778, "success_rate.epoch.env.sat": 0.16, "success_rate.epoch.env.science": 0.8766253401874811, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8749917247068503, "success_rate.epoch.global": 0.9164497041420119, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976635514018691, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9953488372093023, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004651162790697674 }, { "epoch": 2.9026416702172986, "grad_norm": 481.86247014032307, "learning_rate": 3.345030404100524e-07, "loss": 0.183, "step": 13625, "success_rate.epoch.env.abd": 0.9868766404199475, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9700374531835206, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9506493506493506, "success_rate.epoch.env.logic": 0.8993558776167472, "success_rate.epoch.env.math": 0.9750415973377704, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8766999093381687, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8747507212776704, "success_rate.epoch.global": 0.9164302600472813, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966216216216216, "tokens_p.mean_in_band": 0.6927083333333334, "tokens_rate.above_band": 0.9390862944162437, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06091370558375635 }, { "epoch": 2.9037068598210483, "grad_norm": 80.50282392334165, "learning_rate": 3.34476738413564e-07, "loss": 0.1404, "step": 13630, "success_rate.epoch.env.abd": 0.9868766404199475, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9700374531835206, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9483204134366925, "success_rate.epoch.env.logic": 0.8994368463395012, "success_rate.epoch.env.math": 0.9750727046115496, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8768487775430124, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8745627219130084, "success_rate.epoch.global": 0.9164108618654073, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957186873029521, "tokens_p.mean_below_band": 1.257285475730896e-07, "tokens_p.mean_in_band": 0.5504064498933902, "tokens_rate.above_band": 0.8810606060606061, "tokens_rate.below_band": 0.000505050505050505, "tokens_rate.in_band": 0.11843434343434343 }, { "epoch": 2.9047720494247975, "grad_norm": 129.60142394429622, "learning_rate": 3.344504550182453e-07, "loss": 0.2027, "step": 13635, "success_rate.epoch.env.abd": 0.9869451697127938, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9483204134366925, "success_rate.epoch.env.logic": 0.8996789727126806, "success_rate.epoch.env.math": 0.975093399750934, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8766214177978884, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8745823393319274, "success_rate.epoch.global": 0.9163915094339623, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971590909090909, "tokens_p.mean_in_band": 0.7135416666666666, "tokens_rate.above_band": 0.9794520547945206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02054794520547945 }, { "epoch": 2.905837239028547, "grad_norm": 0.0, "learning_rate": 3.344241902432513e-07, "loss": 0.2155, "step": 13640, "success_rate.epoch.env.abd": 0.9869451697127938, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9485861182519281, "success_rate.epoch.env.logic": 0.8996789727126806, "success_rate.epoch.env.math": 0.9751449875724938, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8767329716696806, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8746213253781626, "success_rate.epoch.global": 0.916489988221437, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979437108325873, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9991055456171736, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0008944543828264759 }, { "epoch": 2.9069024286322964, "grad_norm": 131.81684788258252, "learning_rate": 3.343979441077237e-07, "loss": 0.1793, "step": 13645, "success_rate.epoch.env.abd": 0.9869791666666666, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9485861182519281, "success_rate.epoch.env.logic": 0.8997594226142742, "success_rate.epoch.env.math": 0.9751963621331129, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8765432098765432, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8746191489802488, "success_rate.epoch.global": 0.9164705882352941, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9913877952755905, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9548872180451128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045112781954887216 }, { "epoch": 2.907967618236046, "grad_norm": 26.348733399631833, "learning_rate": 3.343717166307904e-07, "loss": 0.211, "step": 13650, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8676470588235294, "success_rate.epoch.env.agentgym:sciworld": 0.9701492537313433, "success_rate.epoch.env.agentgym:textcraft": 0.9821428571428571, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9485861182519281, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9752168525402726, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8766175142943123, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8746588134259908, "success_rate.epoch.global": 0.9165687426556992, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960056390977443, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.9090328078397953, "grad_norm": 38.51936733672637, "learning_rate": 3.343455078315659e-07, "loss": 0.1117, "step": 13655, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8681318681318682, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.9487179487179487, "success_rate.epoch.env.logic": 0.9, "success_rate.epoch.env.math": 0.9752270850536746, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8768028846153846, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8747712220745215, "success_rate.epoch.global": 0.9166666666666666, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975183823529412, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9985315712187959, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014684287812041115 }, { "epoch": 2.910097997443545, "grad_norm": 103.53290918560029, "learning_rate": 3.343193177291509e-07, "loss": 0.2268, "step": 13660, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9777777777777777, "success_rate.epoch.env.ded": 0.948849104859335, "success_rate.epoch.env.logic": 0.9000799360511591, "success_rate.epoch.env.math": 0.9752475247524752, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8766876687668766, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748255481288034, "success_rate.epoch.global": 0.9166471277842907, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986959761549925, "tokens_p.mean_in_band": 0.5340711805555556, "tokens_rate.above_band": 0.9738751814223512, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026124818577648767 }, { "epoch": 2.9111631870472943, "grad_norm": 85.17111379979617, "learning_rate": 3.342931463426326e-07, "loss": 0.1473, "step": 13665, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.948849104859335, "success_rate.epoch.env.logic": 0.9001597444089456, "success_rate.epoch.env.math": 0.9752883031301482, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8765358106083309, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748666227073811, "success_rate.epoch.global": 0.9166276346604215, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9935546875, "tokens_p.mean_in_band": 0.6376953125, "tokens_rate.above_band": 0.975609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024390243902439025 }, { "epoch": 2.912228376651044, "grad_norm": 28.24366223984647, "learning_rate": 3.3426699369108453e-07, "loss": 0.2943, "step": 13670, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9002394253790902, "success_rate.epoch.env.math": 0.9753187988482106, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8764213046080191, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748780915860442, "success_rate.epoch.global": 0.9166081871345029, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970982142857143, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.963302752293578, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03669724770642202 }, { "epoch": 2.913293566254793, "grad_norm": 48.55987565337608, "learning_rate": 3.3424085979356653e-07, "loss": 0.2038, "step": 13675, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9702602230483272, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9003189792663477, "success_rate.epoch.env.math": 0.9753593429158111, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8763071407230355, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748786310469417, "success_rate.epoch.global": 0.916588785046729, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9857954545454546, "tokens_p.mean_in_band": 0.69921875, "tokens_rate.above_band": 0.952755905511811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047244094488188976 }, { "epoch": 2.914358755858543, "grad_norm": 91.68625530196564, "learning_rate": 3.342147446691248e-07, "loss": 0.2306, "step": 13680, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8686131386861314, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9004777070063694, "success_rate.epoch.env.math": 0.9753896636587367, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8758579528498955, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.874864995404383, "success_rate.epoch.global": 0.9164527421236873, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952362804878049, "tokens_p.mean_in_band": 0.726318359375, "tokens_rate.above_band": 0.9111111111111111, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08888888888888889 }, { "epoch": 2.915423945462292, "grad_norm": 311.17938717189907, "learning_rate": 3.3418864833679186e-07, "loss": 0.3119, "step": 13685, "success_rate.epoch.env.abd": 0.9870801033591732, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9007148530579825, "success_rate.epoch.env.math": 0.9754299754299754, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8759320011929616, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8749403841835369, "success_rate.epoch.global": 0.9165501165501165, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970386533665836, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9975124378109452, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0024875621890547263 }, { "epoch": 2.916489135066042, "grad_norm": 38.684225393630584, "learning_rate": 3.341625708155865e-07, "loss": 0.1845, "step": 13690, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9703703703703703, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9007936507936508, "success_rate.epoch.env.math": 0.9754601226993865, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8761167361524718, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8749701095169253, "success_rate.epoch.global": 0.9166472642607684, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949596774193549, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.9789473684210527, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021052631578947368 }, { "epoch": 2.9175543246697915, "grad_norm": 60.52852500756108, "learning_rate": 3.341365121245139e-07, "loss": 0.2775, "step": 13695, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9782608695652174, "success_rate.epoch.env.ded": 0.9489795918367347, "success_rate.epoch.env.logic": 0.9007936507936508, "success_rate.epoch.env.math": 0.9755102040816327, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8762273132996132, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750045207399375, "success_rate.epoch.global": 0.9167441860465116, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969512195121951, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9951456310679612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0048543689320388345 }, { "epoch": 2.9186195142735407, "grad_norm": 105.9988224563135, "learning_rate": 3.341104722825654e-07, "loss": 0.4084, "step": 13700, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9465648854961832, "success_rate.epoch.env.logic": 0.9002375296912114, "success_rate.epoch.env.math": 0.9755102040816327, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8761140819964349, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8747662003712043, "success_rate.epoch.global": 0.9164924506387921, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9938056792873051, "tokens_p.mean_in_band": 0.7248507724719101, "tokens_rate.above_band": 0.9098277608915907, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09017223910840932 }, { "epoch": 2.91968470387729, "grad_norm": 191.71952869911075, "learning_rate": 3.340844513087186e-07, "loss": 0.2535, "step": 13705, "success_rate.epoch.env.abd": 0.9871134020618557, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.9004739336492891, "success_rate.epoch.env.math": 0.9755301794453507, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8762611275964391, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748152046475525, "success_rate.epoch.global": 0.9165893271461717, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976946721311475, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.9207498934810396, "grad_norm": 49.45679574806478, "learning_rate": 3.3405844922193746e-07, "loss": 0.2592, "step": 13710, "success_rate.epoch.env.abd": 0.9871794871794872, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9705882352941176, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.9006309148264984, "success_rate.epoch.env.math": 0.9755501222493888, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8761114404267931, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748236884593008, "success_rate.epoch.global": 0.9165701042873696, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968171296296297, "tokens_p.mean_in_band": 0.6241319444444444, "tokens_rate.above_band": 0.96, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04 }, { "epoch": 2.9218150830847893, "grad_norm": 37.68974011765153, "learning_rate": 3.3403246604117213e-07, "loss": 0.098, "step": 13715, "success_rate.epoch.env.abd": 0.9872122762148338, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.9007874015748032, "success_rate.epoch.env.math": 0.9755700325732899, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8762214983713354, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.874872227410756, "success_rate.epoch.global": 0.9166666666666666, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99933638996139, "tokens_p.mean_in_band": 0.716796875, "tokens_rate.above_band": 0.9923371647509579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007662835249042145 }, { "epoch": 2.9228802726885386, "grad_norm": 1063.2302774167858, "learning_rate": 3.3400650178535897e-07, "loss": 0.2805, "step": 13720, "success_rate.epoch.env.abd": 0.9872122762148338, "success_rate.epoch.env.agentgym:alfworld": 0.8690909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9467005076142132, "success_rate.epoch.env.logic": 0.9008654602675059, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8761088113542282, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748726906107437, "success_rate.epoch.global": 0.9166473988439306, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925, "tokens_p.mean_in_band": 0.6784855769230769, "tokens_rate.above_band": 0.8849557522123894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11504424778761062 }, { "epoch": 2.923945462292288, "grad_norm": 51.36229935927613, "learning_rate": 3.339805564734205e-07, "loss": 0.2539, "step": 13725, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8695652173913043, "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9468354430379747, "success_rate.epoch.env.logic": 0.9008654602675059, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.1568627450980392, "success_rate.epoch.env.science": 0.8763282172373081, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8749539385272865, "success_rate.epoch.global": 0.916743648960739, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970525568181818, "tokens_p.mean_in_band": 0.6328125, "tokens_rate.above_band": 0.9977324263038548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0022675736961451248 }, { "epoch": 2.9250106518960375, "grad_norm": 99.77297555224303, "learning_rate": 3.3395463012426546e-07, "loss": 0.2581, "step": 13730, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8700361010830325, "success_rate.epoch.env.agentgym:sciworld": 0.9708029197080292, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9468354430379747, "success_rate.epoch.env.logic": 0.9009433962264151, "success_rate.epoch.env.math": 0.9756295694557271, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8759210138520483, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8746943783792556, "success_rate.epoch.global": 0.9164936562860438, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.996195652173913, "tokens_p.mean_in_band": 0.5609756097560976, "tokens_rate.above_band": 0.8937823834196891, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10621761658031088 }, { "epoch": 2.926075841499787, "grad_norm": 167.65796785631733, "learning_rate": 3.3392872275678884e-07, "loss": 0.2252, "step": 13735, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8700361010830325, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9787234042553191, "success_rate.epoch.env.ded": 0.9468354430379747, "success_rate.epoch.env.logic": 0.9010989010989011, "success_rate.epoch.env.math": 0.9756394640682094, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8758458370108856, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8747122323651525, "success_rate.epoch.global": 0.9164746543778802, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961222627737226, "tokens_p.mean_in_band": 0.45390625, "tokens_rate.above_band": 0.9647887323943662, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035211267605633804 }, { "epoch": 2.9271410311035364, "grad_norm": 65.92616260040826, "learning_rate": 3.3390283438987174e-07, "loss": 0.1537, "step": 13740, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8709677419354839, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.946969696969697, "success_rate.epoch.env.logic": 0.9012539184952978, "success_rate.epoch.env.math": 0.9756493506493507, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8759553203997649, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.874874372779874, "success_rate.epoch.global": 0.9165707710011507, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9995591692789969, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9984350547730829, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001564945226917058 }, { "epoch": 2.9282062207072856, "grad_norm": 50.79706360580214, "learning_rate": 3.338769650423813e-07, "loss": 0.3547, "step": 13745, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8714285714285714, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.946969696969697, "success_rate.epoch.env.logic": 0.9013312451057165, "success_rate.epoch.env.math": 0.9756986634264885, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.876064610866373, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8749377145387153, "success_rate.epoch.global": 0.9166666666666666, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973312672176309, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9972527472527473, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0027472527472527475 }, { "epoch": 2.9292714103110353, "grad_norm": 52.56713490968818, "learning_rate": 3.3385111473317113e-07, "loss": 0.2486, "step": 13750, "success_rate.epoch.env.abd": 0.9872773536895675, "success_rate.epoch.env.agentgym:alfworld": 0.8718861209964412, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.947103274559194, "success_rate.epoch.env.logic": 0.9014084507042254, "success_rate.epoch.env.math": 0.9757281553398058, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8762100322675271, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750143732723831, "success_rate.epoch.global": 0.9167623421354765, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9985214194373402, "tokens_p.mean_in_band": 0.763671875, "tokens_rate.above_band": 0.9974489795918368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002551020408163265 }, { "epoch": 2.930336599914785, "grad_norm": 84.65153791720067, "learning_rate": 3.3382528348108063e-07, "loss": 0.2564, "step": 13755, "success_rate.epoch.env.abd": 0.9873096446700508, "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.947103274559194, "success_rate.epoch.env.logic": 0.9015625, "success_rate.epoch.env.math": 0.9757575757575757, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8760257913247362, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750585391166296, "success_rate.epoch.global": 0.916743119266055, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976190476190476, "tokens_p.mean_in_band": 0.6766493055555556, "tokens_rate.above_band": 0.9790209790209791, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02097902097902098 }, { "epoch": 2.9314017895185343, "grad_norm": 180.65384693970705, "learning_rate": 3.337994713049354e-07, "loss": 0.1197, "step": 13760, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, "success_rate.epoch.env.agentgym:sciworld": 0.9709090909090909, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.9017160686427457, "success_rate.epoch.env.math": 0.9757771497779572, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8761709601873536, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8751024796032588, "success_rate.epoch.global": 0.9168384879725086, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990808823529411, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9920424403183024, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007957559681697613 }, { "epoch": 2.9324669791222835, "grad_norm": 213.7760159704575, "learning_rate": 3.337736782235472e-07, "loss": 0.1954, "step": 13765, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8723404255319149, "success_rate.epoch.env.agentgym:sciworld": 0.967391304347826, "success_rate.epoch.env.agentgym:textcraft": 0.9824561403508771, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.9018691588785047, "success_rate.epoch.env.math": 0.9758064516129032, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8762796139221994, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748091395345575, "success_rate.epoch.global": 0.9168097036274173, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9968525179856115, "tokens_p.mean_in_band": 0.435546875, "tokens_rate.above_band": 0.9928571428571429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007142857142857143 }, { "epoch": 2.933532168726033, "grad_norm": 179.1763456015147, "learning_rate": 3.337479042557138e-07, "loss": 0.1132, "step": 13770, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.9019455252918288, "success_rate.epoch.env.math": 0.9754131398629585, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8764602803738317, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748759591963245, "success_rate.epoch.global": 0.9167904903417533, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99875, "tokens_p.mean_in_band": 0.6302083333333334, "tokens_rate.above_band": 0.9749303621169917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025069637883008356 }, { "epoch": 2.934597358329783, "grad_norm": 44.670808139520055, "learning_rate": 3.337221494202192e-07, "loss": 0.1466, "step": 13775, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9472361809045227, "success_rate.epoch.env.logic": 0.9021739130434783, "success_rate.epoch.env.math": 0.9754428341384863, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8763127187864644, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8748860065090345, "success_rate.epoch.global": 0.9167713209270465, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992445054945055, "tokens_p.mean_in_band": 0.5966796875, "tokens_rate.above_band": 0.9191919191919192, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08080808080808081 }, { "epoch": 2.935662547933532, "grad_norm": 0.0, "learning_rate": 3.3369641373583323e-07, "loss": 0.3327, "step": 13780, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9024012393493416, "success_rate.epoch.env.math": 0.9754823151125402, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8763848396501458, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8749288399900987, "success_rate.epoch.global": 0.9168662333219295, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986489661654135, "tokens_p.mean_in_band": 0.8645833333333334, "tokens_rate.above_band": 0.994392523364486, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005607476635514018 }, { "epoch": 2.936727737537282, "grad_norm": 73.69747390381193, "learning_rate": 3.3367069722131185e-07, "loss": 0.1716, "step": 13785, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9791666666666666, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9025522041763341, "success_rate.epoch.env.math": 0.9755020080321285, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8763096623981373, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8749375200350599, "success_rate.epoch.global": 0.9168470213008315, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915081521739131, "tokens_p.mean_in_band": 0.51171875, "tokens_rate.above_band": 0.9787234042553191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02127659574468085 }, { "epoch": 2.937792927141031, "grad_norm": 116.86833715647312, "learning_rate": 3.3364499989539725e-07, "loss": 0.2378, "step": 13790, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.872791519434629, "success_rate.epoch.env.agentgym:sciworld": 0.9675090252707581, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9026275115919629, "success_rate.epoch.env.math": 0.9755314881668672, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8764893926184248, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750020371113038, "success_rate.epoch.global": 0.9169416315849357, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958333333333333, "tokens_p.mean_in_band": 0.4609375, "tokens_rate.above_band": 0.9905660377358491, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009433962264150943 }, { "epoch": 2.9388581167447807, "grad_norm": 89.84531203150478, "learning_rate": 3.336193217768172e-07, "loss": 0.1752, "step": 13795, "success_rate.epoch.env.abd": 0.9873417721518988, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9026275115919629, "success_rate.epoch.env.math": 0.9755804643714971, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8765969802555168, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750676148419063, "success_rate.epoch.global": 0.9170360268212296, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9964398734177216, "tokens_p.mean_in_band": 0.703125, "tokens_rate.above_band": 0.9974747474747475, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0025252525252525255 }, { "epoch": 2.93992330634853, "grad_norm": 581.6091059065415, "learning_rate": 3.3359366288428594e-07, "loss": 0.1664, "step": 13800, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9475, "success_rate.epoch.env.logic": 0.9028527370855821, "success_rate.epoch.env.math": 0.9755804643714971, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8764501160092807, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750924975332304, "success_rate.epoch.global": 0.9170166874787149, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978813559322034, "tokens_p.mean_in_band": 0.6555989583333334, "tokens_rate.above_band": 0.9874476987447699, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012552301255230125 }, { "epoch": 2.9409884959522796, "grad_norm": 361.8161057373051, "learning_rate": 3.335680232365034e-07, "loss": 0.1848, "step": 13805, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9476309226932669, "success_rate.epoch.env.logic": 0.9023827824750192, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.876231884057971, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750445000657265, "success_rate.epoch.global": 0.9168840004535662, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980908360128617, "tokens_p.mean_in_band": 0.6748621323529411, "tokens_rate.above_band": 0.97339593114241, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026604068857589983 }, { "epoch": 2.942053685556029, "grad_norm": 23.612774643333683, "learning_rate": 3.335424028521556e-07, "loss": 0.1494, "step": 13810, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8732394366197183, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9476309226932669, "success_rate.epoch.env.logic": 0.9026073619631901, "success_rate.epoch.env.math": 0.9756389776357828, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8763752171395484, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8750806031664511, "success_rate.epoch.global": 0.9169781402197305, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9912050898203593, "tokens_p.mean_in_band": 0.7924107142857143, "tokens_rate.above_band": 0.9226519337016574, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07734806629834254 }, { "epoch": 2.9431188751597785, "grad_norm": 253.6609624716674, "learning_rate": 3.3351680174991445e-07, "loss": 0.1918, "step": 13815, "success_rate.epoch.env.abd": 0.9874055415617129, "success_rate.epoch.env.agentgym:alfworld": 0.8736842105263158, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9478908188585607, "success_rate.epoch.env.logic": 0.9026819923371647, "success_rate.epoch.env.math": 0.9756778309409888, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8764467592592593, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8751614846092495, "success_rate.epoch.global": 0.9170720669759023, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999875745526839, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.944184064763528, "grad_norm": 118.37897388650002, "learning_rate": 3.3349121994843786e-07, "loss": 0.3028, "step": 13820, "success_rate.epoch.env.abd": 0.9874371859296482, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9478908188585607, "success_rate.epoch.env.logic": 0.9029051987767585, "success_rate.epoch.env.math": 0.9757068896853843, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.876229034123771, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8752076526107037, "success_rate.epoch.global": 0.9170527743247825, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962837837837838, "tokens_p.mean_in_band": 0.486083984375, "tokens_rate.above_band": 0.9585492227979274, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04145077720207254 }, { "epoch": 2.9452492543672775, "grad_norm": 81.81235166389844, "learning_rate": 3.3346565746636967e-07, "loss": 0.1684, "step": 13825, "success_rate.epoch.env.abd": 0.9874371859296482, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.9676258992805755, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9478908188585607, "success_rate.epoch.env.logic": 0.9029051987767585, "success_rate.epoch.env.math": 0.9757358790771679, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8764790764790765, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8752330191331663, "success_rate.epoch.global": 0.9171464047860932, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9902522935779816, "tokens_p.mean_in_band": 0.880859375, "tokens_rate.above_band": 0.9819819819819819, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018018018018018018 }, { "epoch": 2.9463144439710267, "grad_norm": 61.626123211632304, "learning_rate": 3.3344011432233965e-07, "loss": 0.255, "step": 13830, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948019801980198, "success_rate.epoch.env.logic": 0.9029793735676088, "success_rate.epoch.env.math": 0.9757647993643226, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8762975778546713, "success_rate.epoch.env.webshop": 0.9791666666666666, "success_rate.epoch.env_macro_mean": 0.8752510283629921, "success_rate.epoch.global": 0.9171270718232044, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9444444444444443, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0001511487303507, "tokens_p.mean_in_band": 0.5615234375, "tokens_rate.above_band": 0.9975874547647768, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0024125452352231603 }, { "epoch": 2.9473796335747764, "grad_norm": 70.16975958078042, "learning_rate": 3.3341459053496345e-07, "loss": 0.2641, "step": 13835, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9481481481481482, "success_rate.epoch.env.logic": 0.9031273836765827, "success_rate.epoch.env.math": 0.9758032526775089, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8763688760806917, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8753247808979154, "success_rate.epoch.global": 0.9172204077035703, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999366224648986, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9968895800933126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003110419906687403 }, { "epoch": 2.9484448231785256, "grad_norm": 136.18511815266748, "learning_rate": 3.3338908612284266e-07, "loss": 0.3768, "step": 13840, "success_rate.epoch.env.abd": 0.9874686716791979, "success_rate.epoch.env.agentgym:alfworld": 0.8741258741258742, "success_rate.epoch.env.agentgym:sciworld": 0.967741935483871, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9484029484029484, "success_rate.epoch.env.logic": 0.9025875190258752, "success_rate.epoch.env.math": 0.9758224336107808, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8761877339475957, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8752841422073945, "success_rate.epoch.global": 0.9170885363932951, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9996675531914894, "tokens_p.mean_in_band": 0.6350740131578947, "tokens_rate.above_band": 0.983435047951177, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016564952048823016 }, { "epoch": 2.9495100127822753, "grad_norm": 199.43719178929103, "learning_rate": 3.3336360110456463e-07, "loss": 0.1873, "step": 13845, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, "success_rate.epoch.env.agentgym:sciworld": 0.9678571428571429, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9484029484029484, "success_rate.epoch.env.logic": 0.9027355623100304, "success_rate.epoch.env.math": 0.9758320126782885, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8762945914844649, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8753642125216554, "success_rate.epoch.global": 0.9171817058096415, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9992579155672823, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9973684210526316, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002631578947368421 }, { "epoch": 2.9505752023860246, "grad_norm": 172.9738618836053, "learning_rate": 3.3333813549870267e-07, "loss": 0.1522, "step": 13850, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.8745644599303136, "success_rate.epoch.env.agentgym:sciworld": 0.9679715302491103, "success_rate.epoch.env.agentgym:textcraft": 0.9827586206896551, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9484029484029484, "success_rate.epoch.env.logic": 0.9028094153378892, "success_rate.epoch.env.math": 0.9758797943851325, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8764012647312446, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8753953666465144, "success_rate.epoch.global": 0.9172746660680211, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997362012987013, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.9516403919897742, "grad_norm": 125.43237331738564, "learning_rate": 3.33312689323816e-07, "loss": 0.1716, "step": 13855, "success_rate.epoch.env.abd": 0.9875311720698254, "success_rate.epoch.env.agentgym:alfworld": 0.875, "success_rate.epoch.env.agentgym:sciworld": 0.9679715302491103, "success_rate.epoch.env.agentgym:textcraft": 0.9830508474576272, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9485294117647058, "success_rate.epoch.env.logic": 0.9028094153378892, "success_rate.epoch.env.math": 0.975898854207823, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8765432098765432, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8754876607526423, "success_rate.epoch.global": 0.9173581520520296, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9980612998522895, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.9527055815935235, "grad_norm": 52.94560375595468, "learning_rate": 3.332872625984496e-07, "loss": 0.1934, "step": 13860, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, "success_rate.epoch.env.agentgym:sciworld": 0.9680851063829787, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948780487804878, "success_rate.epoch.env.logic": 0.9028094153378892, "success_rate.epoch.env.math": 0.9759178839320963, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8765786452353617, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.875618421672007, "success_rate.epoch.global": 0.9174507168458781, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987322515212982, "tokens_p.mean_in_band": 0.8606770833333334, "tokens_rate.above_band": 0.9979757085020243, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0020242914979757085 }, { "epoch": 2.953770771197273, "grad_norm": 189.57117936015476, "learning_rate": 3.332618553411342e-07, "loss": 0.3331, "step": 13865, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, "success_rate.epoch.env.agentgym:sciworld": 0.9683098591549296, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.9028831562974203, "success_rate.epoch.env.math": 0.9759368836291913, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8764334862385321, "success_rate.epoch.env.webshop": 0.9795918367346939, "success_rate.epoch.env_macro_mean": 0.8756454176825451, "success_rate.epoch.global": 0.9174311926605505, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.95, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0002334578441836, "tokens_p.mean_in_band": 0.6434151785714286, "tokens_rate.above_band": 0.9852786540483701, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014721345951629864 }, { "epoch": 2.954835960801023, "grad_norm": 80.79133231660975, "learning_rate": 3.3323646757038646e-07, "loss": 0.1763, "step": 13870, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8754325259515571, "success_rate.epoch.env.agentgym:sciworld": 0.9685314685314685, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.9029567854435178, "success_rate.epoch.env.math": 0.9759747932256794, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8765042979942693, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8757192470589241, "success_rate.epoch.global": 0.9175234689316049, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9990808823529411, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.955901150404772, "grad_norm": 141.87967115918724, "learning_rate": 3.332110993047089e-07, "loss": 0.1314, "step": 13875, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, "success_rate.epoch.env.agentgym:sciworld": 0.9686411149825784, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.903177004538578, "success_rate.epoch.env.math": 0.9760125835627212, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.876539673446004, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8754814560122729, "success_rate.epoch.global": 0.9175039071221255, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971649484536083, "tokens_p.mean_in_band": 0.466796875, "tokens_rate.above_band": 0.9979423868312757, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00205761316872428 }, { "epoch": 2.9569663400085213, "grad_norm": 0.0, "learning_rate": 3.331857505625896e-07, "loss": 0.1862, "step": 13880, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.8724137931034482, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.9025679758308157, "success_rate.epoch.env.math": 0.9760408483896308, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8766809728183119, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8754514033312618, "success_rate.epoch.global": 0.9174843889384479, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999406067251462, "tokens_p.mean_in_band": 0.44047619047619047, "tokens_rate.above_band": 0.9702127659574468, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029787234042553193 }, { "epoch": 2.958031529612271, "grad_norm": 0.0, "learning_rate": 3.331604213625026e-07, "loss": 0.2228, "step": 13885, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.9027882441597589, "success_rate.epoch.env.math": 0.9760690466849745, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8767867352773013, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8755234642091531, "success_rate.epoch.global": 0.9175762976163956, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9993640988372093, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.9590967192160207, "grad_norm": 387.4544287097135, "learning_rate": 3.331351117229077e-07, "loss": 0.3024, "step": 13890, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.9028614457831325, "success_rate.epoch.env.math": 0.9760971786833855, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8761415525114156, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8754740233778712, "success_rate.epoch.global": 0.9173342234089897, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9846938775510204, "tokens_p.mean_in_band": 0.491455078125, "tokens_rate.above_band": 0.8596491228070176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14035087719298245 }, { "epoch": 2.96016190881977, "grad_norm": 71.45807436402674, "learning_rate": 3.331098216622503e-07, "loss": 0.1819, "step": 13895, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.96875, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.948905109489051, "success_rate.epoch.env.logic": 0.9029345372460497, "success_rate.epoch.env.math": 0.9761252446183953, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8760683760683761, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8754765671010428, "success_rate.epoch.global": 0.917314958879751, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.640625, "tokens_rate.above_band": 0.9629629629629629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037037037037037035 }, { "epoch": 2.961227098423519, "grad_norm": 135.40746736484348, "learning_rate": 3.3308455119896164e-07, "loss": 0.2931, "step": 13900, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9490291262135923, "success_rate.epoch.env.logic": 0.9030075187969925, "success_rate.epoch.env.math": 0.9761439186546734, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8759601706970128, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8754961669581598, "success_rate.epoch.global": 0.9172957371225577, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.96, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986906424581006, "tokens_p.mean_in_band": 0.7060546875, "tokens_rate.above_band": 0.9675675675675676, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032432432432432434 }, { "epoch": 2.962292288027269, "grad_norm": 38.92787264919413, "learning_rate": 3.3305930035145863e-07, "loss": 0.1717, "step": 13905, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9490291262135923, "success_rate.epoch.env.logic": 0.9031531531531531, "success_rate.epoch.env.math": 0.976171875, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8761363636363636, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8755279654709632, "success_rate.epoch.global": 0.917387447327567, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9936079545454546, "tokens_p.mean_in_band": 0.732421875, "tokens_rate.above_band": 0.9777777777777777, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022222222222222223 }, { "epoch": 2.9633574776310185, "grad_norm": 54.696034723488836, "learning_rate": 3.330340691381439e-07, "loss": 0.2109, "step": 13910, "success_rate.epoch.env.abd": 0.9875621890547264, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9491525423728814, "success_rate.epoch.env.logic": 0.9032258064516129, "success_rate.epoch.env.math": 0.9762090483619345, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8759931895573212, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8755361535382943, "success_rate.epoch.global": 0.9173681878599912, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998857973421927, "tokens_p.mean_in_band": 0.3356119791666667, "tokens_rate.above_band": 0.9901315789473685, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009868421052631578 }, { "epoch": 2.9644226672347678, "grad_norm": 165.36439275421932, "learning_rate": 3.330088575774058e-07, "loss": 0.244, "step": 13915, "success_rate.epoch.env.abd": 0.9875930521091811, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9491525423728814, "success_rate.epoch.env.logic": 0.9032983508245878, "success_rate.epoch.env.math": 0.9762461059190031, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8758503401360545, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8755359367713154, "success_rate.epoch.global": 0.9173489710112857, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983504398826979, "tokens_p.mean_in_band": 0.4367897727272727, "tokens_rate.above_band": 0.96875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03125 }, { "epoch": 2.965487856838517, "grad_norm": 49.75839838282585, "learning_rate": 3.3298366568761834e-07, "loss": 0.17, "step": 13920, "success_rate.epoch.env.abd": 0.9875930521091811, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9688581314878892, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.9034431137724551, "success_rate.epoch.env.math": 0.9762645914396887, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.876026040192471, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.875577915723218, "success_rate.epoch.global": 0.9174403183023873, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955956375838926, "tokens_p.mean_in_band": 0.763671875, "tokens_rate.above_band": 0.9966555183946488, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0033444816053511705 }, { "epoch": 2.9665530464422667, "grad_norm": 264.4967429866051, "learning_rate": 3.3295849348714117e-07, "loss": 0.2432, "step": 13925, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9689655172413794, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.9035153328347045, "success_rate.epoch.env.math": 0.9762830482115086, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8756359525155455, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8755632506484626, "success_rate.epoch.global": 0.9173106646058733, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9199999999999999, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942857142857143, "tokens_p.mean_below_band": 1.3869794202037156e-11, "tokens_p.mean_in_band": 0.7005208333333334, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.005494505494505495, "tokens_rate.in_band": 0.03296703296703297 }, { "epoch": 2.9676182360460164, "grad_norm": 113.52741488629844, "learning_rate": 3.329333409943197e-07, "loss": 0.1583, "step": 13930, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.872852233676976, "success_rate.epoch.env.agentgym:sciworld": 0.9690721649484536, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.9030574198359433, "success_rate.epoch.env.math": 0.9763106796116505, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8754237288135593, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8755145363217779, "success_rate.epoch.global": 0.9171812968681077, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0012608069164266, "tokens_p.mean_in_band": 0.59765625, "tokens_rate.above_band": 0.9914285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008571428571428572 }, { "epoch": 2.9686834256497656, "grad_norm": 282.68784744982264, "learning_rate": 3.329082082274847e-07, "loss": 0.1042, "step": 13935, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.8732876712328768, "success_rate.epoch.env.agentgym:sciworld": 0.9691780821917808, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9492753623188406, "success_rate.epoch.env.logic": 0.9030574198359433, "success_rate.epoch.env.math": 0.9763657497094149, "success_rate.epoch.env.sat": 0.15384615384615385, "success_rate.epoch.env.science": 0.8754940711462451, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.875575151524476, "success_rate.epoch.global": 0.9172725269883234, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981008287292817, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9945054945054945, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005494505494505495 }, { "epoch": 2.969748615253515, "grad_norm": 155.6263380918352, "learning_rate": 3.328830952049529e-07, "loss": 0.2128, "step": 13940, "success_rate.epoch.env.abd": 0.9876237623762376, "success_rate.epoch.env.agentgym:alfworld": 0.8737201365187713, "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.9024571854058079, "success_rate.epoch.env.math": 0.9763931888544891, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8755292125317528, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8753223766215613, "success_rate.epoch.global": 0.917143485915493, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988258670520231, "tokens_p.mean_in_band": 0.5027173913043478, "tokens_rate.above_band": 0.9575645756457565, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042435424354243544 }, { "epoch": 2.9708138048572645, "grad_norm": 180.59142436740416, "learning_rate": 3.328580019450265e-07, "loss": 0.4607, "step": 13945, "success_rate.epoch.env.abd": 0.9876543209876543, "success_rate.epoch.env.agentgym:alfworld": 0.8741496598639455, "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.9025297619047619, "success_rate.epoch.env.math": 0.9764296754250387, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8756345177664975, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8753836902816375, "success_rate.epoch.global": 0.9172345570455045, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995697463768116, "tokens_p.mean_in_band": 0.8020833333333334, "tokens_rate.above_band": 0.9928057553956835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007194244604316547 }, { "epoch": 2.9718789944610142, "grad_norm": 25.457645429553807, "learning_rate": 3.3283292846599314e-07, "loss": 0.2456, "step": 13950, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9692832764505119, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.9025297619047619, "success_rate.epoch.env.math": 0.9764569664222308, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8758096310898339, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8751354714826316, "success_rate.epoch.global": 0.917215634606939, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974206349206349, "tokens_p.mean_in_band": 0.79609375, "tokens_rate.above_band": 0.984375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015625 }, { "epoch": 2.9729441840647635, "grad_norm": 168.49700040947678, "learning_rate": 3.3280787478612635e-07, "loss": 0.272, "step": 13955, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9493975903614458, "success_rate.epoch.env.logic": 0.9026745913818722, "success_rate.epoch.env.math": 0.976475125337447, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8756680731364276, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8751563514291895, "success_rate.epoch.global": 0.9171967536740513, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9985504314994607, "tokens_p.mean_in_band": 0.58046875, "tokens_rate.above_band": 0.9946351931330472, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00536480686695279 }, { "epoch": 2.974009373668513, "grad_norm": 52.453416326424474, "learning_rate": 3.32782840923685e-07, "loss": 0.183, "step": 13960, "success_rate.epoch.env.abd": 0.9876847290640394, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9495192307692307, "success_rate.epoch.env.logic": 0.9021497405485545, "success_rate.epoch.env.math": 0.976502311248074, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8752107925801011, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8750805964227137, "success_rate.epoch.global": 0.9169588080631026, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976728723404256, "tokens_p.mean_below_band": 5.3085386753082275e-08, "tokens_p.mean_in_band": 0.6656494140625, "tokens_rate.above_band": 0.9343936381709742, "tokens_rate.below_band": 0.0019880715705765406, "tokens_rate.in_band": 0.0636182902584493 }, { "epoch": 2.9750745632722624, "grad_norm": 66.7103815478627, "learning_rate": 3.327578268969136e-07, "loss": 0.1388, "step": 13965, "success_rate.epoch.env.abd": 0.9877149877149877, "success_rate.epoch.env.agentgym:alfworld": 0.8711864406779661, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9495192307692307, "success_rate.epoch.env.logic": 0.9021497405485545, "success_rate.epoch.env.math": 0.9761538461538461, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8753859107493686, "success_rate.epoch.env.webshop": 0.98, "success_rate.epoch.env_macro_mean": 0.8750675883978036, "success_rate.epoch.global": 0.9169402495075509, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946209016393442, "tokens_p.mean_in_band": 0.2890625, "tokens_rate.above_band": 0.991869918699187, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008130081300813009 }, { "epoch": 2.976139752876012, "grad_norm": 140.60219152502108, "learning_rate": 3.327328327240421e-07, "loss": 0.2018, "step": 13970, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9023668639053254, "success_rate.epoch.env.math": 0.9761721752498078, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8751402918069585, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8751556191833949, "success_rate.epoch.global": 0.9169217315260166, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970695020746888, "tokens_p.mean_in_band": 0.57421875, "tokens_rate.above_band": 0.9934047815333883, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006595218466611707 }, { "epoch": 2.9772049424797613, "grad_norm": 110.16844687673188, "learning_rate": 3.327078584232863e-07, "loss": 0.347, "step": 13975, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9496402877697842, "success_rate.epoch.env.logic": 0.9025110782865583, "success_rate.epoch.env.math": 0.9761813292354975, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8751050126015122, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.875166354561711, "success_rate.epoch.global": 0.9169032539855864, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901620370370371, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9152542372881356, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0847457627118644 }, { "epoch": 2.978270132083511, "grad_norm": 55.896515646269314, "learning_rate": 3.326829040128471e-07, "loss": 0.311, "step": 13980, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9025110782865583, "success_rate.epoch.env.math": 0.9762178749520521, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8747203579418344, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8749281749562314, "success_rate.epoch.global": 0.9166666666666666, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9951923076923077, "tokens_p.mean_below_band": 8.307397365570068e-07, "tokens_p.mean_in_band": 0.47934027777777777, "tokens_rate.above_band": 0.9534225424601133, "tokens_rate.below_band": 0.0002573340195573855, "tokens_rate.in_band": 0.04632012352032939 }, { "epoch": 2.9793353216872602, "grad_norm": 68.14596240797243, "learning_rate": 3.3265796951091117e-07, "loss": 0.1403, "step": 13985, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.8716216216216216, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9473684210526315, "success_rate.epoch.env.logic": 0.9027982326951399, "success_rate.epoch.env.math": 0.9762269938650306, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.874616029042167, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8749456244491306, "success_rate.epoch.global": 0.9166485073000654, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924568965517241, "tokens_p.mean_in_band": 0.6927083333333334, "tokens_rate.above_band": 0.8787878787878788, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12121212121212122 }, { "epoch": 2.98040051129101, "grad_norm": 97.65722158025633, "learning_rate": 3.326330549356505e-07, "loss": 0.329, "step": 13990, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9474940334128878, "success_rate.epoch.env.logic": 0.9027982326951399, "success_rate.epoch.env.math": 0.9762633996937213, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8747209821428571, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8750482218259058, "success_rate.epoch.global": 0.9167392250761863, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9984509056244042, "tokens_p.mean_in_band": 0.7783203125, "tokens_rate.above_band": 0.9924314096499527, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007568590350047304 }, { "epoch": 2.981465700894759, "grad_norm": 40.76710571834433, "learning_rate": 3.3260816030522277e-07, "loss": 0.1395, "step": 13995, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9694915254237289, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9474940334128878, "success_rate.epoch.env.logic": 0.9030837004405287, "success_rate.epoch.env.math": 0.9762815608263198, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8745819397993311, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8750631842381295, "success_rate.epoch.global": 0.9167210263100674, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960443037974683, "tokens_p.mean_in_band": 0.7552083333333334, "tokens_rate.above_band": 0.9634146341463414, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036585365853658534 }, { "epoch": 2.982530890498509, "grad_norm": 0.0, "learning_rate": 3.32583285637771e-07, "loss": 0.1259, "step": 14000, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9695945945945946, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9795918367346939, "success_rate.epoch.env.ded": 0.9474940334128878, "success_rate.epoch.env.logic": 0.9031548055759354, "success_rate.epoch.env.math": 0.9763358778625955, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8746518105849582, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8750903080679635, "success_rate.epoch.global": 0.9168114682884448, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947615606936416, "tokens_p.mean_in_band": 0.78173828125, "tokens_rate.above_band": 0.9558011049723757, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04419889502762431 }, { "epoch": 2.983596080102258, "grad_norm": 64.45447562635663, "learning_rate": 3.325584309514236e-07, "loss": 0.2547, "step": 14005, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9474940334128878, "success_rate.epoch.env.logic": 0.9031548055759354, "success_rate.epoch.env.math": 0.9763719512195121, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8745130773511408, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8751273879307621, "success_rate.epoch.global": 0.9167932306357127, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972067039106145, "tokens_p.mean_in_band": 0.421875, "tokens_rate.above_band": 0.9728260869565217, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02717391304347826 }, { "epoch": 2.9846612697060078, "grad_norm": 222.73755438116066, "learning_rate": 3.3253359626429455e-07, "loss": 0.2687, "step": 14010, "success_rate.epoch.env.abd": 0.9877450980392157, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.9031548055759354, "success_rate.epoch.env.math": 0.9760456273764259, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8743397275507367, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8750933279819137, "success_rate.epoch.global": 0.9166666666666666, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971590909090909, "tokens_p.mean_in_band": 0.5094401041666666, "tokens_rate.above_band": 0.9540816326530612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04591836734693878 }, { "epoch": 2.985726459309757, "grad_norm": 32.947884218776494, "learning_rate": 3.3250878159448317e-07, "loss": 0.4146, "step": 14015, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9696969696969697, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.9034381858083395, "success_rate.epoch.env.math": 0.976063829787234, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8741666666666666, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8751077356950611, "success_rate.epoch.global": 0.9166486252435592, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972826086956522, "tokens_p.mean_in_band": 0.6493055555555556, "tokens_rate.above_band": 0.9470588235294117, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052941176470588235 }, { "epoch": 2.9867916489135067, "grad_norm": 399.25992896654066, "learning_rate": 3.324839869600742e-07, "loss": 0.3641, "step": 14020, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9698996655518395, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.9034381858083395, "success_rate.epoch.env.math": 0.9761092150170648, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8739938939772411, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8751145819128133, "success_rate.epoch.global": 0.9166306228373703, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9981374172185431, "tokens_p.mean_in_band": 0.6109375, "tokens_rate.above_band": 0.967948717948718, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03205128205128205 }, { "epoch": 2.987856838517256, "grad_norm": 60.66138842400301, "learning_rate": 3.324592123791377e-07, "loss": 0.2089, "step": 14025, "success_rate.epoch.env.abd": 0.9877750611246944, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9698996655518395, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9476190476190476, "success_rate.epoch.env.logic": 0.9035792549306063, "success_rate.epoch.env.math": 0.9761273209549072, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8739262953726794, "success_rate.epoch.env.webshop": 0.9803921568627451, "success_rate.epoch.env_macro_mean": 0.8751229070451358, "success_rate.epoch.global": 0.9166126593216678, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963474025974026, "tokens_p.mean_in_band": 0.64453125, "tokens_rate.above_band": 0.9746835443037974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02531645569620253 }, { "epoch": 2.9889220281210056, "grad_norm": 315.94223566805266, "learning_rate": 3.3243445786972945e-07, "loss": 0.204, "step": 14030, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9478672985781991, "success_rate.epoch.env.logic": 0.9036496350364963, "success_rate.epoch.env.math": 0.9761363636363637, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.874031007751938, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8752083262637256, "success_rate.epoch.global": 0.9167026327147173, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979328793774319, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9961240310077519, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003875968992248062 }, { "epoch": 2.989987217724755, "grad_norm": 129.66468493805965, "learning_rate": 3.324097234498901e-07, "loss": 0.1164, "step": 14035, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.97, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9481132075471698, "success_rate.epoch.env.logic": 0.9037199124726477, "success_rate.epoch.env.math": 0.9761634506242906, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8741703539823009, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8752522007749448, "success_rate.epoch.global": 0.9167924121577926, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988924050632911, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 2.9910524073285045, "grad_norm": 137.2455317126776, "learning_rate": 3.323850091376461e-07, "loss": 0.2647, "step": 14040, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.98, "success_rate.epoch.env.ded": 0.9482352941176471, "success_rate.epoch.env.logic": 0.9037900874635568, "success_rate.epoch.env.math": 0.9761904761904762, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8743093922651933, "success_rate.epoch.env.webshop": 0.9807692307692307, "success_rate.epoch.env_macro_mean": 0.8752938365189951, "success_rate.epoch.global": 0.9168819982773471, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9969875145180023, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9953757225433526, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004624277456647399 }, { "epoch": 2.992117596932254, "grad_norm": 142.3021337214023, "learning_rate": 3.3236031495100896e-07, "loss": 0.2017, "step": 14045, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9803921568627451, "success_rate.epoch.env.ded": 0.9482352941176471, "success_rate.epoch.env.logic": 0.9039301310043668, "success_rate.epoch.env.math": 0.9762264150943396, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8743787962451685, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8753847808817096, "success_rate.epoch.global": 0.9169713916971391, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9989820846905537, "tokens_p.mean_in_band": 0.6611328125, "tokens_rate.above_band": 0.9871382636655949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012861736334405145 }, { "epoch": 2.9931827865360034, "grad_norm": 106.325240203674, "learning_rate": 3.323356409079757e-07, "loss": 0.3575, "step": 14050, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9803921568627451, "success_rate.epoch.env.ded": 0.9483568075117371, "success_rate.epoch.env.logic": 0.9034132171387074, "success_rate.epoch.env.math": 0.9762264150943396, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8745865490628445, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8753677220040829, "success_rate.epoch.global": 0.9169531585732703, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965596330275229, "tokens_p.mean_in_band": 0.7589285714285714, "tokens_rate.above_band": 0.9589442815249267, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04105571847507331 }, { "epoch": 2.9942479761397527, "grad_norm": 113.30351148050276, "learning_rate": 3.323109870265285e-07, "loss": 0.336, "step": 14055, "success_rate.epoch.env.abd": 0.9878048780487805, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9807692307692307, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.9034833091436865, "success_rate.epoch.env.math": 0.9762622456669179, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8744147617736161, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8754070086804027, "success_rate.epoch.global": 0.9169349645846748, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999198717948718, "tokens_p.mean_in_band": 0.4060329861111111, "tokens_rate.above_band": 0.985781990521327, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014218009478672985 }, { "epoch": 2.9953131657435024, "grad_norm": 48.07466739262598, "learning_rate": 3.32286353324635e-07, "loss": 0.249, "step": 14060, "success_rate.epoch.env.abd": 0.9878345498783455, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.9036231884057971, "success_rate.epoch.env.math": 0.9762801204819277, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8745529573590096, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8754695965162708, "success_rate.epoch.global": 0.9170240137221269, "success_rate.window.env.abd": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9939298561151079, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.972027972027972, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027972027972027972 }, { "epoch": 2.996378355347252, "grad_norm": 244.59089699719746, "learning_rate": 3.32261739820248e-07, "loss": 0.2891, "step": 14065, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.87248322147651, "success_rate.epoch.env.agentgym:sciworld": 0.9700996677740864, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.9036231884057971, "success_rate.epoch.env.math": 0.9759669545625235, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8743469892768766, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8754250868608179, "success_rate.epoch.global": 0.9168986935103877, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 0.8571428571428571, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958677685950413, "tokens_p.mean_in_band": 0.42083333333333334, "tokens_rate.above_band": 0.8897058823529411, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11029411764705882 }, { "epoch": 2.9974435449510013, "grad_norm": 189.0541955962222, "learning_rate": 3.3223714653130574e-07, "loss": 0.4225, "step": 14070, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8729096989966555, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.9029688631426502, "success_rate.epoch.env.math": 0.9759939984996249, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8744850315847295, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8754283819721411, "success_rate.epoch.global": 0.9168806161745828, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974188790560472, "tokens_p.mean_in_band": 0.4103732638888889, "tokens_rate.above_band": 0.9741379310344828, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02586206896551724 }, { "epoch": 2.9985087345547505, "grad_norm": 103.4420317712618, "learning_rate": 3.322125734757316e-07, "loss": 0.4346, "step": 14075, "success_rate.epoch.env.abd": 0.9878640776699029, "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9836065573770492, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9484777517564403, "success_rate.epoch.env.logic": 0.9029688631426502, "success_rate.epoch.env.math": 0.9760299625468165, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.8743828853538124, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8754608776224095, "success_rate.epoch.global": 0.9168625774738192, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99797197640118, "tokens_p.mean_in_band": 0.5888671875, "tokens_rate.above_band": 0.9769452449567724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023054755043227664 }, { "epoch": 2.9995739241585, "grad_norm": 178.74036372896242, "learning_rate": 3.3218802067143415e-07, "loss": 0.2448, "step": 14080, "success_rate.epoch.env.abd": 0.9878934624697336, "success_rate.epoch.env.agentgym:alfworld": 0.8733333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.9701986754966887, "success_rate.epoch.env.agentgym:textcraft": 0.9838709677419355, "success_rate.epoch.env.babyai": 0.9811320754716981, "success_rate.epoch.env.ded": 0.9485981308411215, "success_rate.epoch.env.logic": 0.9030390738060782, "success_rate.epoch.env.math": 0.9760479041916168, "success_rate.epoch.env.sat": 0.1509433962264151, "success_rate.epoch.env.science": 0.873972602739726, "success_rate.epoch.env.webshop": 0.9811320754716981, "success_rate.epoch.env_macro_mean": 0.8754692452536404, "success_rate.epoch.global": 0.9167378309137489, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976535836177475, "tokens_p.mean_in_band": 0.6021205357142857, "tokens_rate.above_band": 0.9766666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023333333333333334 } ], "logging_steps": 5, "max_steps": 18776, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 351041909738496.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }